In [None]:
! pip3 install tensorflow-gpu
import tensorflow as tf
print(tf.__version__)

import os

import numpy as np

! pip3 install tqdm
from tqdm import tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
2.11.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **Parameter**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Original Data Info.
n_pitch = 53
n_feature = 43

# Model Parameter
input_size = n_feature
target_size = n_pitch + 4 # +4 (addition) is oneset + start correction + end correction + velocity

# Training Parameter
BATCH_SIZE = 32          # Batch Sizes
VALIDATION_RATIO = 0.1  # Validation Ratio to Input Data
learning_rate = 0.001   # Learning Rate
n_hidden = 2            # Hidden Units number

# Data Parameter
PIECE_LEN = 200          # (auto set by data) Extra +1 for START timestamp
is_onset_index = 53
start_correction_index = 54
end_correction_index = 55
velocity_index = 56

ROOT = r'/content/drive/MyDrive/ML'

In [None]:
os.chdir('/content/drive/MyDrive/ML')
print(os.getcwd())

/content/drive/MyDrive/ML


In [None]:
from zipfile import ZipFile
# Delete incomplete unzipped folder and run this 
if 'irealpro_dataset_v2' not in os.listdir():
    print('Creating Folder \'irealpro_dataset_v2\'')
    os.mkdir('../irealpro_dataset_v2')
    print('Extract to Folder \'irealpro_dataset_v2\'')
    with ZipFile(r"/content/drive/MyDrive/ML/irealpro_dataset_v2.zip", 'r') as zObject:
        zObject.extractall(path=r"/content/drive/MyDrive/ML/irealpro_dataset_v2")

In [None]:
np.random.seed(10000)

# **Data Preparation**

In [None]:
dec_init_input = np.zeros((1, target_size))
dec_init_input[0, start_correction_index] = 1.

count_pad = 0 
count_spilled = 0
max_data = 50
def load_npy_data(x_path, y_path, offset):
    global count_pad, count_spilled, dec_init_input
    x = np.load(x_path)
    y = np.load(y_path)

    if offset == 0:
        Z_ = dec_init_input
    else:
        Z_ = y[offset-1].astype(np.float32)
        Z_ = np.expand_dims(Z_, axis=0)
        count_spilled += 1

    if x.shape[0] >= offset+PIECE_LEN:
        X_ = x[offset:offset+PIECE_LEN].astype(np.float32)
        Y_ = y[offset:offset+PIECE_LEN].astype(np.float32)
    else:
        pad_count = offset + PIECE_LEN - x.shape[0]
        X_ = np.pad(x[offset:], ((0, pad_count), (0, 0)), 'constant', constant_values=-1).astype(np.float32)
        Y_ = np.pad(y[offset:], ((0, pad_count), (0, 0)), 'constant', constant_values=-1).astype(np.float32)
        count_pad += 1
    try:
        assert X_.shape == (PIECE_LEN, input_size)
        assert Z_.shape == (1, target_size)
        assert Y_.shape == (PIECE_LEN, target_size)
    except:
        print('You got',X_.shape, Z_.shape, Y_.shape)
        raise ValueError

    return X_, Z_, Y_

def generate_dataset(input_dir: str):
    global max_data
    # using tf.data.Dataset API to create dataset
    x_paths = [] # input path
    y_paths = [] # ans file path
    offsets = [] # starting point of a piece
    data_cnt = 0
    for file_name in os.listdir(input_dir): # 'Scan Files'
        if data_cnt==max_data-1: break
        if file_name.endswith(".ans.npy"):
            data_cnt+=1
            y_path = str(os.path.join(input_dir, file_name))
            x_path = str(os.path.join(input_dir, file_name[:-8] + ".npy"))
            assert os.path.exists(x_path), f"corresponding input file {x_path} doesn't exist"

            # split and pad data into PIECE_LEN
            y_content = np.load(y_path)
            for offset in range(0, y_content.shape[0], PIECE_LEN):
                y_paths.append(y_path)
                x_paths.append(x_path)
                offsets.append(offset)
    dataset = []
    for i in zip(x_paths, y_paths, offsets): # 'Read Files'
        dataset.append(load_npy_data(*i))
    print(len(dataset))
    dataset = list(zip(*dataset))
    # print(len(dataset[0]))
    # print(len(dataset[1]))
    # print(len(dataset[2]))
    encoder_input_data = np.array(dataset[0])
    decoder_input_data = np.array(dataset[1])
    decoder_target_data = np.array(dataset[2])
    print(encoder_input_data.shape)
    print(decoder_input_data.shape)
    print(decoder_target_data.shape)
    perm_id = np.random.shuffle(np.arange(encoder_input_data.shape[0]))
    encoder_input_data = encoder_input_data[perm_id]
    decoder_input_data = decoder_input_data[perm_id]
    decoder_target_data = decoder_target_data[perm_id]

    return encoder_input_data, decoder_input_data, decoder_target_data

encoder_input_data, decoder_input_data, decoder_target_data = generate_dataset(ROOT+"/irealpro_dataset_v2")



print('Data Padded:', count_pad)
print('Data with Non Start Decoder Input:', count_spilled)
print('Data PIECE_LEN:', PIECE_LEN)

449
449
449
449
(449, 200, 43)
(449, 1, 57)
(449, 200, 57)
Data Padded: 49
Data with Non Start Decoder Input: 400
Data PIECE_LEN: 200


In [None]:
# np.save(ROOT + '/encoder_input_data.npy', encoder_input_data)
# np.save(ROOT + '/decoder_input_data.npy', decoder_input_data)
# np.save(ROOT + '/decoder_target_data.npy', decoder_target_data)

In [None]:
# encoder_input_data = np.load(ROOT + '/encoder_input_data.npy', allow_pickle=True)
# decoder_input_data = np.load(ROOT + '/decoder_input_data.npy', allow_pickle=True)
# decoder_target_data = np.load(ROOT + '/decoder_target_data.npy', allow_pickle=True)

# **Model**

## *Define Loss, Accuracy, and Optimizer*

In [None]:
# Define Loss, Accuracy, and Optimizer
def masked_loss_function(y_true, y_pred):
    mask = tf.math.not_equal(tf.reduce_sum(y_true[:,:,:n_pitch], axis=2), -1*n_pitch)  # false if it is a padding time step
    pitch_loss = tf.losses.categorical_crossentropy(y_true[:,:,:n_pitch], y_pred[:,:,:n_pitch])
    onset_loss = tf.losses.binary_crossentropy(y_true[:,:,n_pitch:n_pitch+1], y_pred[:,:,n_pitch:n_pitch+1])
    start_loss = tf.square(y_true[:,:,n_pitch+1] - y_pred[:,:,n_pitch+1])
    end_loss = tf.square(y_true[:,:,n_pitch+2] - y_pred[:,:,n_pitch+2])
    velocity_loss = tf.square(y_true[:,:,n_pitch+3] - y_pred[:,:,n_pitch+3])
    total_loss = tf.reduce_sum([pitch_loss, onset_loss, start_loss, end_loss, velocity_loss], axis=0)
    total_loss *= tf.cast(mask, total_loss.dtype)
    return tf.reduce_mean(total_loss)

def masked_accuracy(y_true, y_pred):
    mask = tf.math.not_equal(tf.reduce_sum(y_true[:,:,:n_pitch], axis=2), -1*n_pitch)  # false if it is a padding time step
    mask = tf.cast(mask, tf.float32)
    pitch_acc = tf.reduce_mean(mask * tf.metrics.categorical_accuracy(y_true[:,:,:n_pitch], y_pred[:,:,:n_pitch]))
    onset_acc = tf.reduce_mean(mask * tf.metrics.binary_accuracy(y_true[:,:,n_pitch:n_pitch+1], y_pred[:,:,n_pitch:n_pitch+1]))
    start_loss = tf.reduce_mean(mask * tf.square(y_true[:,:,n_pitch+1] - y_pred[:,:,n_pitch+1]))
    end_loss = tf.reduce_mean(mask * tf.square(y_true[:,:,n_pitch+2] - y_pred[:,:,n_pitch+2]))
    velocity_loss = tf.reduce_mean(mask * tf.square(y_true[:,:,n_pitch+3] - y_pred[:,:,n_pitch+3]))

    return pitch_acc, onset_acc, start_loss, end_loss, velocity_loss

optimizer = tf.keras.optimizers.Adam()

In [None]:
checkpoint_filepath = './checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    )

## Reinjection Technique

In [None]:
from keras import Model
from keras import backend as K
from keras.layers import Input, Concatenate, Dense, LSTM, Embedding, GRU, Attention, Lambda, Permute, Flatten

# Encoder
encoder_inputs = Input(shape=(PIECE_LEN, input_size))
encoder = LSTM(n_hidden, return_sequences=True, return_state=True)

encoder_outputs, state_h, state_c = encoder(encoder_inputs)
states = [state_h, state_c]
# Attention
attention = Attention(n_hidden)

# Decoder
decoder_inputs = Input(shape=(1, target_size))
decoder_lstm = LSTM(n_hidden, return_sequences=True, return_state=True)
decoder_dense = Dense(target_size, activation='softmax')

all_outputs = []
all_attention = []
inputs = decoder_inputs
for _ in tqdm(range(PIECE_LEN)):
    # Attention Update
    # print(states.shape) 
    # print(encoder_outputs.shape)
    context_vector, attention_weights = attention([tf.expand_dims([state_h, state_c],1), encoder_outputs], return_attention_scores=True)
    all_attention.append(attention_weights)
    # context_vector = Permute((2, 1))(context_vector)
    # context_vector = Flatten()(context_vector)
    # print(inputs.shape)
    # print(context_vector.shape)
    context_vector_h, context_vector_c = Lambda(lambda x: x[:,0,:,:], output_shape=(1,) + context_vector.shape[2:])(context_vector)
    # print(context_vector_h.shape)
    # print(context_vector_c.shape)
    state_h = Concatenate(axis=-1)([state_h,context_vector_h])
    state_c = Concatenate(axis=-1)([state_c,context_vector_c])
    # Decoder Reinject
    outputs, state_h, state_c = decoder_lstm(inputs, initial_state=states)
    # Output
    outputs = decoder_dense(outputs)
    all_outputs.append(outputs)
    # Update Decoder Input & State
    inputs = outputs
    states = [state_h, state_c]

# Concatenate all predictions
decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)

# Define and compile model as previously
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
# masked_loss_function , masked_accuracy

100%|██████████| 200/200 [01:16<00:00,  2.61it/s]


In [None]:
# Train model as previously
print(encoder_input_data.shape)
print(decoder_input_data.shape)
print(decoder_target_data.shape)

inputz = tf.data.Dataset.zip(tuple([tf.data.Dataset.from_tensor_slices((encoder_input_data)), tf.data.Dataset.from_tensor_slices((decoder_input_data))]))
outputz = tf.data.Dataset.from_tensor_slices((decoder_target_data))
dataset = tf.data.Dataset.zip((inputz, outputz))

model.fit(dataset,
          batch_size=BATCH_SIZE,
          epochs=10,
          verbose=1,  
        )

(1, 449, 200, 43)
(1, 449, 1, 57)
(1, 449, 200, 57)
Epoch 1/10


### Predict Function

In [None]:
def predict(test_file):
    encoder_test_data = ...test_file...
    decoder_test_data = np.zeros((BATCH_SIZE, 1, target_size))
    return model.predict([encoder_test_data, decoder_test_data])

# **Midi Format Output**

In [None]:
PATH = "test_input"
test_file = np.load("preprocessed_dataset/irealpro_midi/Autumn Leaves_o0.mid.npy")

In [None]:
import midi_np_translation.output2midi as output2midi

test_result = predict(test_file)
output2midi.output_to_midi(bass_ndarr=test_result.reshape(-1,52), ref_midi_path="input_midi/irealpro_transposed/Autumn Leaves_o0.mid", output_path="yo_al.mid")