# First learning attempts
### using Beethoven dataset
* 29 pieces + transpositions across 2 octaves
* ~70h of music (2.7h per transposition)
* 0.025s resolution (40fps)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
IGNORE_NOTE_VELOCITY = True

# loading data files names
import os

path = '/content/drive/My Drive/datasets/beethoven/'
file_names = os.listdir(path)
file_names = list(filter(lambda fn: '.npz' in fn or '.npy' in fn or '.csv' in fn, file_names))
assert len(file_names) > 0, 'Data not found'

f'Found {len(file_names)} files'

'Found 29 files'

In [2]:
# loading data files
def read_numpy_midi(input_path):
    import numpy as np
    from scipy import sparse
    if 'csv' in input_path:
        return np.loadtxt(input_path, delimiter=",", dtype=np.int32)
    elif 'npy' in input_path:
        return np.load(input_path).astype(np.float32)
    elif 'npz' in input_path:
        sparse_numpy = sparse.load_npz(input_path)
        return sparse_numpy.toarray().astype(np.float32)

file_paths = [f'{path}\\{fn}' for fn in file_names]
tracks = [read_numpy_midi(fp) for fp in file_paths]

### Creating x and y's

In [3]:
data_x = [t[:-1] for t in tracks]
data_y = [t[1:] for t in tracks]

if IGNORE_NOTE_VELOCITY:
    data_x = [dx[:, :128] for dx in data_x]
    data_y = [dy[:, :128] for dy in data_y]

# allow to free mem
tracks = []

### Processing data

In [4]:
# splitting data into chunks (sequences of equal length)
import numpy as np
CHUNK_LENGTH = 200 # equals to 5s at 0.025s frames

flatten = lambda l: [item for sublist in l for item in sublist]

data_x = flatten([np.array_split(dx, len(dx) // CHUNK_LENGTH + 1) for dx in data_x])
data_y = flatten([np.array_split(dy, len(dy) // CHUNK_LENGTH + 1) for dy in data_y])

len(data_x), len(data_y)

437325


(2201, 2201)

In [5]:
# pad smaller chunks to CHUNK_SIZE
def pad_chunk_sequence(chunk, goal_seq):
    d_len = goal_seq - chunk.shape[0]
    npad = ((0, d_len), (0, 0))
    return np.pad(chunk, npad, 'constant')
    
data_x = [pad_chunk_sequence(chunk, CHUNK_LENGTH) for chunk in data_x]
data_y = [pad_chunk_sequence(chunk, CHUNK_LENGTH) for chunk in data_y]

# sanity check
for chunk in data_x:
    assert len(chunk) == CHUNK_LENGTH, 'failed to pad'

In [6]:
# convert list of matrices to highier dim matrices
data_x = np.stack(data_x)
data_y = np.stack(data_y)

In [14]:
# dataset generator
def data_gen(batch_size):
    # x data shape should be [batch_size, sequence_len, input_dim]
    # since training will be in many-to-many mode, y has same shape
    n_samples = len(data_x)
    while True:
        indices = np.random.randint(0, n_samples, batch_size)
        yield data_x[indices], data_y[indices]

## Setting up model

In [15]:
from tensorflow import keras

INPUT_SIZE = 128 if IGNORE_NOTE_VELOCITY else 256
HIDDEN_SIZE = 1024
OUTPUT_SIZE = INPUT_SIZE

BATCH_SIZE = 10
SEQUENCE_LENGTH = CHUNK_LENGTH

INPUT_SHAPE = (None, INPUT_SIZE)
# could be INPUT_SHAPE = (SEQUENCE_LENGTH, INPUT_SIZE)
# however predicting would have to have same seq length

model = keras.models.Sequential([
    keras.layers.LSTM(HIDDEN_SIZE, input_shape=INPUT_SHAPE, return_sequences=True),
    keras.layers.Dense(OUTPUT_SIZE, activation='sigmoid')
])

model.compile(
    loss='mean_squared_error', 
    optimizer='adam', 
    metrics=['categorical_accuracy']
)

gen = data_gen(BATCH_SIZE)
STEPS_PER_EPOCH = len(data_x) // BATCH_SIZE
EPOCHS = 10

In [16]:
# some stat data accumultors for re-running model
from time import time
epochs_elapsed = 0
minutes_elapsed = 0

### Running model

In [17]:
start_time = time()

model.fit_generator(gen, steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS)

minutes_elapsed += (time() - start_time) / 60
epochs_elapsed += EPOCHS



### Saving model

In [None]:
base_path = '/content/drive/My Drive/datasets/'
keywords = '_'.join(['beth', 'notransp'])
file_name = f'{keywords}_{HIDDEN_SIZE}_{epochs_elapsed}epochs_{minutes_elapsed}m.h5'

keras.models.save_model(model, base_path + file_name)