In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import tensorflow_datasets as tfds
import tensorflow as tf

import time
import numpy as np

import pretty_midi
import glob
import os 

import pickle

In [0]:
tf.compat.v1.enable_eager_execution()

Make training data

In [0]:
def get_notes(file_path):
    notes = []
    ins = pretty_midi.PrettyMIDI(file_path)
    if len(ins.instruments) > 1:
        print(file_path, 'is not single channel')
        return None
    for i in ins.instruments:
        for note in i.notes:
            note.start = ins.time_to_tick(note.start)
            note.end = ins.time_to_tick(note.end)
        notes.append(i.notes)
    return notes

In [5]:
data_dir = '/content/drive/My Drive/CarolComposer/carol_dataset'
midi_datas = []
for x in os.walk(data_dir):
    for y in glob.glob(os.path.join(x[0], '*.mid')):
        midi_datas.append(y)

print('total_file_num:', len(midi_datas))
all_notes = []
note_index = 0
for midi_data in midi_datas:
    midi_notes = get_notes(midi_data)
    for notes in midi_notes:
        notes_to_strings = []
        for note in notes:
            notes_to_string = str(note.start) + '/' + str(note.end) + '/' + str(note.pitch)
            notes_to_strings.append(notes_to_string)
        notes_to_strings.append('0/0/0')
    all_notes.append(notes_to_strings)

print(len(all_notes))

total_file_num: 37
37


In [0]:
idx = 0
for notes in all_notes:
    for note in notes:
        idx += 1

total_note_len = idx      

In [0]:
vocabs = set()
for notes in all_notes:
    for note in notes:
        vocabs.add(note)

vocabs = list(vocabs)
# save musical vocabs
with open('/content/drive/My Drive/CarolComposer/vocabs', 'wb') as fp:
    pickle.dump(vocabs, fp)

# load saved musical vocabs
loaded_vocab = set()
with open('/content/drive/My Drive/CarolComposer/vocabs', 'rb') as fp:
    loaded_vocab = pickle.load(fp)

In [0]:
note2idx = {u:i for i, u in enumerate(loaded_vocab)}
idx2note = np.array(loaded_vocab)

print('note2idx:', note2idx)
print('idx2note:', idx2note)

song_as_int = []

for notes in all_notes:
    for note in notes:
        if note in note2idx:
            song_as_int.append(note2idx[note])

song_as_int = np.array(song_as_int)

In [9]:
print(song_as_int)

[9210 5835 3206 ...  471  844 4050]


In [0]:
seq_length = 20
examples_per_epoch = total_note_len//(seq_length+1)

note_dataset = tf.data.Dataset.from_tensor_slices(song_as_int)

In [0]:
sequences = note_dataset.batch(seq_length+1, drop_remainder=True)

In [0]:
def split_input_target(chunk):
    input_note = chunk[:-1]
    target_note = chunk[1:]
    return input_note, target_note

dataset = sequences.map(split_input_target)

In [0]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

Build model

In [0]:
vocab_size = len(vocabs)
embedding_dim = 256
rnn_units = 512

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
      tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [0]:
model = build_model(
    vocab_size = len(vocabs),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           2476288   
_________________________________________________________________
gru (GRU)                    (64, None, 512)           1181184   
_________________________________________________________________
dense (Dense)                (64, None, 9673)          4962249   
Total params: 8,619,721
Trainable params: 8,619,721
Non-trainable params: 0
_________________________________________________________________


In [0]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [0]:
my_adam = tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False)
model.compile(optimizer=my_adam, loss=loss)

In [0]:
EPOCHS=20

In [0]:
checkpoint_dir = '/content/drive/My Drive/CarolComposer/model_output'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [22]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
checkpoint_dir = '/content/drive/My Drive/CarolComposer/model_output'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [0]:
def generate_midi_tick(model):
    num_generate = 1000


    input_eval = [note2idx['0/0/0']]
    input_eval = tf.expand_dims(input_eval, 0)

    generated_music = []

    temperature = 1

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        if(idx2note[predicted_id] == '0/0/0'):
            break
        generated_music.append(idx2note[predicted_id])

    return generated_music

In [0]:
genereated_midis = generate_midi_tick(model)

In [71]:
print(len(genereated_midis))

252


In [0]:
new_music = pretty_midi.PrettyMIDI(midi_file=None, resolution=256, initial_tempo=80.0)
piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
piano = pretty_midi.Instrument(program=piano_program)

for midi in genereated_midis:
    start_sec = new_music.tick_to_time(int(midi.split('/')[0]))
    end_sec = new_music.tick_to_time(int(midi.split('/')[1]))
    pitch = int(midi.split('/')[2])
    note = pretty_midi.Note(100, pitch, start_sec, end_sec)
    piano.notes.append(note)

new_music.instruments.append(piano)

In [0]:
new_music.write('/content/drive/My Drive/CarolComposer/output_music.mid')