# Train Model on Large Training Set

In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

import music_generator.sequence_utils as sequence_utils
import music_generator.utils as utils
from music_generator.midi_converter import DiscreteTimeMidiConverter
from music_generator.model import MusicModel

print('TensorFlow version:', tf.__version__)

TensorFlow version: 2.2.0-dev20200327


# Make sure CUDA and GPU are working

In [2]:
for message in utils.check_cuda_and_gpu():
    print(message)

CUDA and GPU Available...


# Create model

In [3]:
description = 'paper_hparams_384classes_64embed_128batch_adam'

model = MusicModel(
    n_classes=384,
    embed_dims=64,
    rnn_size=256,
    rnn_layers=3,
    dense_size=None,
    dense_layers=1,
    dropout_rate=0.3,
    batch_norm=True,
    init_lr=None,
    dense_activation='relu',
    ckpt_dir='./training_checkpoints/{}'.format(description),
    log_dir='./logs/{}'.format(description)
)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 64)          24576     
_________________________________________________________________
lstm (LSTM)                  (None, None, 256)         328704    
_________________________________________________________________
dropout (Dropout)            (None, None, 256)         0         
_________________________________________________________________
batch_normalization (BatchNo (None, None, 256)         1024      
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 256)         525312    
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 256)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, None, 256)         1

# Create Dataset from Bach Chorales

In [4]:
data_path = './bach_chorales_midi/chorale_set_full/'

midi_converter = DiscreteTimeMidiConverter()
sequences = midi_converter.convert_folder(data_path)
sequences, labels = sequence_utils.window(sequences, window_size=128)

x_train, x_val, y_train, y_val = train_test_split(sequences, labels, test_size=0.1, shuffle=True)
print('Training Sequences: {}\nValidation Sequences: {}'.format(len(x_train), len(x_val)))

batch_size = 128

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))

dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
dataset_val = dataset_val.batch(batch_size, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

Training Sequences: 283227
Validation Sequences: 31470


# Train Model on Dataset

In [5]:
epochs = 100
history = model.fit(dataset,
                    val_data=dataset_val,
                    epochs=epochs,
                    verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79

# Sample from model

In [6]:
# sample 5 midi sequences, of length 1000 events, from the model with a middle-C note as the seed
for i in range(5):
    seed = [60]
    generated_sequence = model.generate_sequence(1000, seed)
    midi_converter.convert_to_file(generated_sequence, './generated files/{}'.format(description), 'sample{}.mid'.format(i))