In [77]:
import os
import time
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import Masking, Bidirectional, LSTM, TimeDistributed, Dense, Activation

In [285]:
BATCH_SIZE = 128
VALIDATION_RATIO = 0.1

PIECE_LEN = 128
n_feature = 42
n_hidden = 200
n_pitch = 52
learning_rate = 0.001

## Build dataset

In [286]:
def load_npy_data(x_path, y_path, offset):
    x = np.load(x_path)
    y = np.load(y_path)
    if x.shape[0] >= offset+PIECE_LEN:
        return x[offset:offset+PIECE_LEN].astype(np.float64), y[offset:offset+PIECE_LEN].astype(np.float64)
    else:
        pad_count = offset + PIECE_LEN - x.shape[0]
        x = np.pad(x[offset:], ((0, pad_count), (0, 0)), 'constant', constant_values=-1).astype(np.float64)
        y = np.pad(y[offset:], ((0, pad_count), (0, 0)), 'constant', constant_values=-1).astype(np.float64)
        return x, y

def generate_dataset(input_dir: str):
    # using tf.data.Dataset API to create dataset
    x_paths = [] # input path
    y_paths = [] # ans file path
    offsets = [] # starting point of a piece
    for file_name in sorted(os.listdir(input_dir)):
        if file_name.endswith(".ans.npy"):
            y_path = str(os.path.join(input_dir, file_name))
            x_path = str(os.path.join(input_dir, file_name.removesuffix(".ans.npy") + ".npy"))
            assert os.path.exists(x_path), f"corresponding input file {x_path} doesn't exist"

            # split and pad data into PIECE_LEN
            y_content = np.load(y_path)
            for offset in range(0, y_content.shape[0], PIECE_LEN):
                y_paths.append(y_path)
                x_paths.append(x_path)
                offsets.append(offset)

    
    train_dataset = tf.data.Dataset.from_tensor_slices((x_paths, y_paths, offsets)).shuffle(100000)
    train_dataset = train_dataset.map(lambda x_path, y_path, offset: tf.numpy_function(load_npy_data, [x_path, y_path, offset], [tf.float64, tf.float64]))
    train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(-1).cache()
    
    return train_dataset


train_dataset = generate_dataset("test_input")

## build model

In [335]:
input = tf.keras.Input(shape=(PIECE_LEN, n_feature))  
x = Masking(mask_value=-1, input_shape=(PIECE_LEN, n_feature))(input) # Ignore Padded Data
x = Bidirectional(LSTM(units=n_hidden, input_shape=(1, n_feature), return_sequences=True))(x)
x = TimeDistributed(Dense(n_pitch, activation="softmax"))(x)
model = tf.keras.Model(inputs=input, outputs=x)

print(model.summary())

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

Model: "model_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, 128, 42)]         0         
                                                                 
 masking_19 (Masking)        (None, 128, 42)           0         
                                                                 
 bidirectional_14 (Bidirecti  (None, 128, 400)         388800    
 onal)                                                           
                                                                 
 time_distributed_13 (TimeDi  (None, 128, 52)          20852     
 stributed)                                                      
                                                                 
Total params: 409,652
Trainable params: 409,652
Non-trainable params: 0
_________________________________________________________________
None


## loss/accuracy function

In [367]:
def masked_loss_function(y_true, y_pred):
    mask = tf.math.not_equal(tf.reduce_sum(y_true, axis=2), -1*n_pitch)  # false if it is a padding time step
    loss = tf.losses.categorical_crossentropy(y_true, y_pred)
    mask = tf.cast(mask, loss.dtype)
    loss *= mask
    return tf.reduce_mean(loss)

def masked_accuracy(y_true, y_pred):
    mask = tf.math.not_equal(tf.reduce_sum(y_true, axis=2), -1*n_pitch)  # false if it is a padding time step
    acc = tf.metrics.categorical_accuracy(y_true, y_pred)
    mask = tf.cast(mask, acc.dtype)
    acc *= mask
    return tf.reduce_mean(acc)

## training

In [369]:
@tf.function
def train_step(x, y):
    loss = 0
    with tf.GradientTape() as tape:
        pred = model(x)
        
        loss = masked_loss_function(y, pred)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    acc = masked_accuracy(y, pred)

    return loss, acc

In [375]:
EPOCHS = 10

for epoch in range(EPOCHS):
    start = time.time()

    total_loss = 0
    total_acc = 0
    steps_per_epoch = 0

    for x, y in train_dataset:
        batch_loss, batch_acc = train_step(x, y)
        total_loss += batch_loss
        total_acc += batch_acc
        steps_per_epoch += 1
    
    print(f'Epoch {epoch+1} Loss {total_loss / steps_per_epoch:.4f} Acc {total_acc / steps_per_epoch:.4f}')
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Loss 0.2939 Acc 0.8665
Time taken for 1 epoch 3.017658233642578 sec

Epoch 2 Loss 0.2861 Acc 0.8689
Time taken for 1 epoch 3.299320697784424 sec

Epoch 3 Loss 0.2786 Acc 0.8709
Time taken for 1 epoch 2.4503231048583984 sec

Epoch 4 Loss 0.2710 Acc 0.8735
Time taken for 1 epoch 3.1006157398223877 sec

Epoch 5 Loss 0.2638 Acc 0.8756
Time taken for 1 epoch 2.3538990020751953 sec

Epoch 6 Loss 0.2570 Acc 0.8785
Time taken for 1 epoch 2.1065120697021484 sec

Epoch 7 Loss 0.2502 Acc 0.8811
Time taken for 1 epoch 2.371340036392212 sec

Epoch 8 Loss 0.2437 Acc 0.8833
Time taken for 1 epoch 2.2087249755859375 sec

Epoch 9 Loss 0.2374 Acc 0.8855
Time taken for 1 epoch 2.0335919857025146 sec

Epoch 10 Loss 0.2314 Acc 0.8873
Time taken for 1 epoch 2.0522079467773438 sec



## predict

In [380]:
def slice_per_step(a, step=PIECE_LEN):
    # add -1 to the end of each sample to make them the same length per step(piece_length)
    pad_count = step - (a.shape[0] % step)
    print(a.shape, end=' ')
    if pad_count!=step : 
        print('pad by',pad_count, end=' ')
        a = np.pad(a, ((0, pad_count), (0, 0)), 'constant', constant_values=-1)
    # reshape into per step
    a = np.reshape(a, (-1, step, a.shape[1]))
    print('to',a.shape)
    return a

In [382]:
import midi_np_translation.output2midi as output2midi
PATH = "test_input"
# load np file
test_file = np.load("preprocessed_dataset/irealpro_midi/Autumn Leaves_o0.mid.npy")
# test_file_truth = np.load(PATH + "/" + "4on6.mid.ans.npy")
# output2midi.output_to_midi(bass_ndarr=test_file_truth, output_path="4on6_truth.mid")
test_result = model.predict(slice_per_step(test_file))
# test_result = np.argmax(test_result, axis=2)
output2midi.output_to_midi(bass_ndarr=test_result.reshape(-1,52), ref_midi_path="input_midi/irealpro_transposed/Autumn Leaves_o0.mid", output_path="yo_al.mid")

