In [1]:
google_colab_env = False

In [2]:
if google_colab_env:
    from google.colab import drive
    drive.mount('/content/drive')
    !git clone https://github.com/GrzegorzKazana/artificial-music.git

## importing dataset, splitting tracks

In [3]:
import os
import sys
import json
import numpy as np
from scipy import sparse

proj_base_path = ('/content/artificial-music' 
                  if google_colab_env else '../../../')

data_base_path =  ('/content/drive/My Drive/artificial-music/datasets'
                   if google_colab_env else '../../../datasets')

models_base_path =  ('/content/drive/My Drive/artificial-music/pretrained_models' 
                     if google_colab_env else '../../../pretrained_models')

sys.path.append(os.path.join(os.getcwd(), proj_base_path))

dataset_path = 'numpy/pokemon_clustered_time'
word_vectors_file = 'meta/_word_vectors_11000_ignore_ratio=0.05.wv'
duration_dict_file = 'meta/durations_dict.json'

word_vectors_path = os.path.join(data_base_path, dataset_path, word_vectors_file)
with open(os.path.join(data_base_path, dataset_path, duration_dict_file)) as fp:
    duration_dict = json.load(fp)

tracks_path = os.path.join(data_base_path, dataset_path)
track_paths = [os.path.join(tracks_path, f) for f in os.listdir(tracks_path) if f.endswith('.npz')]

tracks = [sparse.load_npz(p).tocsr() for p in track_paths]

for t in tracks: print(t.shape, end='')

(253, 152)(343, 152)(487, 152)(121, 152)(917, 152)(121, 152)(1002, 152)(1002, 152)(121, 152)(917, 152)(487, 152)(343, 152)(253, 152)(551, 152)(551, 152)(487, 152)(317, 152)(121, 152)(917, 152)(121, 152)(1002, 152)(1002, 152)(917, 152)(121, 152)(121, 152)(317, 152)(487, 152)(551, 152)(228, 152)(487, 152)(121, 152)(121, 152)(917, 152)(917, 152)(1002, 152)(1002, 152)(317, 152)(917, 152)(917, 152)(121, 152)(917, 152)(121, 152)(487, 152)(228, 152)(487, 152)(917, 152)(121, 152)(121, 152)(317, 152)(317, 152)(1002, 152)(917, 152)(121, 152)(917, 152)(121, 152)(228, 152)(487, 152)(487, 152)(389, 152)(228, 152)(317, 152)(389, 152)(343, 152)(389, 152)(317, 152)(228, 152)(389, 152)(487, 152)(228, 152)(389, 152)(228, 152)(253, 152)(343, 152)(343, 152)(389, 152)(253, 152)(228, 152)(389, 152)(487, 152)(228, 152)(121, 152)(487, 152)(551, 152)(253, 152)(389, 152)(228, 152)(487, 152)(343, 152)(343, 152)(1002, 152)(389, 152)(1002, 152)(389, 152)(343, 152)(228, 152)(317, 152)(228, 152)(389, 152)(551, 152)(

## loading embedding + encoding dataset

In [4]:
from gensim.models import KeyedVectors

wv = KeyedVectors.load(word_vectors_path, mmap='r')

In [5]:
from src.data_processing.sparse_notes_classified_time.embed_adapt import decode_note_vector_track, encode_frames, append_track_end  

note_vecs, durations = zip(*[(t[:, :128], t[:, 128:].toarray()) for t in tracks])

note_vecs = [encode_frames(t, wv) for t in note_vecs]

tracks_embedded = [np.concatenate((n_v, d), axis=1) for n_v, d in zip(note_vecs, durations)]

len(tracks), tracks[0].shape

(250, (253, 152))

In [6]:
# from src.data_processing.embedding_sparse_notes.common import TRACK_END, UNKNOWN_FRAME

# track_split_points = np.where(track == TRACK_END)[0]

# # + 1, so split happens after <TRACK_END>, [:-1] to skip last, empty partition
# tracks = [t.tolist() for t in np.split(track, track_split_points + 1)][:-1]
# # tracks is now a list of lists of frames

### prepare dataset generator

In [8]:
def dataset_gen(tracks, window_size_range=(20, 300), batch_size=16):
    """
    tracks - list of np.arrays of shape (track_length, frame_size)
    window_size - length of generated batch
    batch_size - number of sequences in batch
    """
    max_window_size = min([len(t) for t in tracks]) - 3
    while True:
        window_size = np.random.randint(window_size_range[0], min(max_window_size, window_size_range[1]))
        # select #batch_size tracks
        selected_track_indicies = [np.random.randint(0, len(tracks)) for _ in range(batch_size)]
        # select sequence starting point for each track
        sequence_indicies = [np.random.randint(0, len(tracks[sti]) - window_size - 2)
                             for sti in selected_track_indicies]
        
        
        # create slices for x and y
        x_slice = lambda seqi: np.s_[seqi:seqi + window_size]
        y_slice = lambda seqi: np.s_[seqi + 1:seqi + window_size + 1]
        
        x = [tracks[sti][x_slice(seqi)] for sti, seqi in zip(selected_track_indicies, sequence_indicies)]
        y = [tracks[sti][y_slice(seqi)] for sti, seqi in zip(selected_track_indicies, sequence_indicies)]

        yield np.stack(x), np.stack(y)
        
x, y = next(dataset_gen(tracks_embedded, (10, 50), 5))
x.shape, y.shape

((5, 21, 40), (5, 21, 40))

## Setting up model

In [9]:
from tensorflow import keras as K

NOTES_INPUT_SIZE = 16
DURATION_INPUT_SIZE = 24

DURATION_INPUT_DENSE_SIZE = 16

MAIN_LSTM_SIZE = 256
MAIN_DENSE_SIZE = 128

NOTES_LSTM_SIZE = 64
DURATION_LSTM_SIZE = 32

NOTES_OUTPUT_DENSE = 32
DURATION_OUTPUT_DENSE = 32

NOTES_OUTPUT_SIZE = NOTES_INPUT_SIZE
DURATION_OUTPUT_SIZE = DURATION_INPUT_SIZE

BATCH_SIZE = 16
WINDOW_SIZE_RANGE = (20, 300)

NOTES_INPUT_SHAPE = (None, NOTES_INPUT_SIZE)
DURATION_INPUT_SHAPE = (None, DURATION_INPUT_SIZE)
# None allows for variable seq_length between batches

NOTES_INPUT_NAME = 'notes_input'
DURATION_INPUT_NAME = 'duration_input'
NOTES_OUTPUT_NAME = 'notes_output'
DURATION_OUTPUT_NAME = 'duration_output'

#### load existing model

In [None]:
# or load saved model
model_path = 'lstm_lstm/embedded_16_128_stacked_32/embedded_16_128_stacked_32md_e200_t2019-10-09T09_59_31_cpu.h5'
model = K.models.load_model(os.path.join(models_base_path, model_path))

#### or create new one

In [19]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, CuDNNLSTM, Input, Concatenate
rnn_layer = CuDNNLSTM if google_colab_env else LSTM

x1 = Input(NOTES_INPUT_SHAPE, name=NOTES_INPUT_NAME)
x2 = Input(DURATION_INPUT_SHAPE, name=DURATION_INPUT_NAME)
x3 = Dense(DURATION_INPUT_DENSE_SIZE)(x2)

h1 = Concatenate()([x1, x3])
h2 = rnn_layer(MAIN_LSTM_SIZE, return_sequences=True)(h1)
h3 = Dense(MAIN_DENSE_SIZE)(h2)

g1 = rnn_layer(NOTES_LSTM_SIZE, return_sequences=True)(h3)
g2 = Dense(NOTES_OUTPUT_DENSE)(g1)
y1 = Dense(NOTES_OUTPUT_SIZE, name=NOTES_OUTPUT_NAME)(g2)

i1 = rnn_layer(DURATION_LSTM_SIZE, return_sequences=True)(h3)
i2 = Dense(DURATION_OUTPUT_DENSE)(i1)
y2 = Dense(DURATION_OUTPUT_SIZE, name=DURATION_OUTPUT_NAME, activation='softmax')(i2)

model = Model(inputs=[x1, x2], outputs=[y1, y2])

model.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
duration_input (InputLayer)     [(None, None, 24)]   0                                            
__________________________________________________________________________________________________
notes_input (InputLayer)        [(None, None, 16)]   0                                            
__________________________________________________________________________________________________
dense_28 (Dense)                (None, None, 16)     400         duration_input[0][0]             
__________________________________________________________________________________________________
concatenate_8 (Concatenate)     (None, None, 32)     0           notes_input[0][0]                
                                                                 dense_28[0][0]             

In [21]:
losses = {
    NOTES_OUTPUT_NAME: 'mse',
    DURATION_OUTPUT_NAME: 'categorical_crossentropy',
}

metrics = {
    NOTES_OUTPUT_NAME: ['mse'],
    DURATION_OUTPUT_NAME: ['categorical_accuracy'],
}

# maybe diffrent weights for outputs???

model.compile(
    loss=losses,
    optimizer='adam', 
    metrics=metrics,
)

#### define training callbacks

In [None]:
from src.training.common.training_callbacks import ModelAndLogSavingCallback, GeneratingAndPlottingCallback

# logging callback
logging_path = 'lstm'
experiment_name = f'clustered_time_{MAIN_LSTM_SIZE}'
experiment_path = os.path.join(models_base_path, logging_path, experiment_name)
os.makedirs(experiment_path, exist_ok=True)
print(f'saving checkpoints and logs to {experiment_path}')

# logging disabled for now
log_callback = ModelAndLogSavingCallback(model, experiment_path, save_log_only=True)

# generating callback
from src.generating.generating import recurrent_generate
from src.generating.embedded_clustered_time_generating_seeds import seed_generators
from src.data_processing.common.helpers import pipe
from src.data_processing.sparse_notes_classified_time.np2mid import np2sparse
from src.data_processing.sparse_notes_classified_time.embed_adapt import decode_note_vector_track

SEED_LENGTH = 10
GENERATED_SEQ_LENGTH = 100
GENERATING_WINDOW_SIZE = 25
METHOD = 'const_frame_seed_noise'

seed_generator = lambda: seed_generators[METHOD](
    SEED_LENGTH, NOTES_INPUT_SIZE, word_vectors=wv, batch_size=BATCH_SIZE)

sample_generator = lambda model, seed: recurrent_generate(
    model, 
    seed, 
    GENERATED_SEQ_LENGTH, 
    GENERATING_WINDOW_SIZE, 
    is_binary=False,
    transform_input=lambda x: (x[:, :, :NOTES_INPUT_SIZE], x[:, :, NOTES_INPUT_SIZE:],
    transform_output=lambda args: np.concatenate(args, axis=2),
)

sparse_sample_generator = lambda model, seed: pipe(
    sample_generator(model, seed),
    lambda batch_of_samples: [
        np2sparse(
            decode_note_vector_track(s[:, :, :NOTES_INPUT_SIZE], wv), 
            s[:, :, NOTES_INPUT_SIZE:],
            duration_dict,
        ) for s in batch_of_samples]
)

print(f'generating sequences of {GENERATED_SEQ_LENGTH} using {METHOD}')

gen_callback = GeneratingAndPlottingCallback(model, sparse_sample_generator, seed_generator)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

mc = ModelCheckpoint(
    os.path.join(experiment_path, 'm_e_{epoch:03d}-l_{val_loss:.2f}.h5'),
    monitor='val_loss',
    mode='min',
    save_best_only=True,
)

es = EarlyStopping(
    monitor='val_loss',
    min_delta=1e-2,
    patience=10
)

#### training

In [None]:
# pre training code
from time import time
epochs_elapsed = 0
minutes_elapsed = 0

data_gen = dataset_gen(vectorized_tracks, WINDOW_SIZE_RANGE, BATCH_SIZE)
test_gen = dataset_gen(vectorized_tracks, WINDOW_SIZE_RANGE, BATCH_SIZE)

In [None]:
EPOCHS = 1
STEPS_PER_EPOCH = 1
TEST_STEPS = 100

start_time = time()
model.fit_generator(
    data_gen,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=test_gen,
    validation_steps=TEST_STEPS,
    callbacks=[log_callback, gen_callback, mc, es]
)

minutes_elapsed += (time() - start_time) // 60
epochs_elapsed += EPOCHS