In [1]:
import numpy as np
import tensorflow as tf
from music21 import *
import os
import time
from tensorflow.keras.utils import to_categorical
from tqdm.notebook import tqdm

In [2]:
path = os.getcwd()
filenames = os.listdir("./data")

In [3]:
def _parseMidi(list_filenames):
    print("Loading samples.....")
    samples = [converter.parse(path + "/data/" + str(file)) for file in tqdm(list_filenames)]
    sam_mono = [sample.chordify() for sample in tqdm(samples) if len(instrument.partitionByInstrument(sample).parts) == 1]
    print("Samples loaded and chordified.....")
    list_chords = [[] for _ in sam_mono]
    list_durations = [[] for _ in sam_mono]
    list_keys = [[]]
    print("Preparing data....")
    for i, song in enumerate(sam_mono):
        list_keys.append(song.analyze("key"))
        for element in song:
            if isinstance(element, note.Note):
                list_chords[i].append(element.pitch)
                list_durations[i].append(element.duration.quarterLength)
            elif isinstance(element, chord.Chord):
                list_chords[i].append(".".join(str(n) for n in element.pitches))
                list_durations[i].append(element.duration.quarterLength)
    return list_chords, list_durations, list_keys

In [4]:
list_chords, list_durations, list_keys = _parseMidi(filenames)

Loading samples.....


HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))


Samples loaded and chordified.....
Preparing data....


In [192]:
#Find the number of unique Chords
unique_chords = np.unique([i for s in list_chords for i in s])
print(len(unique_chords))
chord_to_int = dict(zip(unique_chords, range(0, len(unique_chords))))
#Find the number of unique durations
unique_durations = np.unique([i for s in list_durations for i in s])
print(len(unique_durations))
duration_to_int = dict(zip(unique_durations, range(0, len(unique_durations))))

12632
25


In [193]:
#Create the reverse dict
int_to_chord = {i:j for j, i in chord_to_int.items()}
int_to_durations = {i:j for j, i in duration_to_int.items()}

In [197]:
# Define the sequence length
sequence_length = 32

# Define the empty lists
train_chords = []
train_durations = []
target_chords = []
target_durations = []

#Genrate the train and the target data
for i in range(len(list_chords)):
    chordList = [chord_to_int[chord] for chord in list_chords[i]]
    durationList = [duration_to_int[chord] for chord in list_durations[i]]
    for j in range(len(chordList) - sequence_length - 1):
        train_chords.append(chordList[j:j + sequence_length])
        train_durations.append(durationList[j:j + sequence_length])
        target_chords.append(chordList[j + sequence_length + 1])
        target_durations.append(durationList[j + sequence_length + 1])

In [198]:
train_chords = np.asarray(train_chords)
train_durations = np.asarray(train_durations)
target_chords = np.asarray(target_chords)
target_durations = np.asarray(target_durations)

In [199]:
print(train_chords.shape)
print(train_durations.shape)
print(target_chords.shape)
print(target_durations.shape)

(59365, 32)
(59365, 32)
(59365,)
(59365,)


In [201]:
embed_dim = 64

In [202]:
target_chords = to_categorical(target_chords)
target_durations = to_categorical(target_durations)

In [203]:
# Input layers
chord_input = tf.keras.layers.Input((None,))
duration_input = tf.keras.layers.Input((None,))

# Embedding layers
embedding_chord = tf.keras.layers.Embedding(input_dim=len(chord_to_int), output_dim=embed_dim,
                                            input_length=sequence_length)(chord_input)
embedding_durations = tf.keras.layers.Embedding(input_dim=len(duration_to_int), output_dim=embed_dim, 
                                               input_length=sequence_length)(duration_input)

# Concat these 2 layers
concat = tf.keras.layers.Concatenate(axis = 1)([embedding_chord, embedding_durations])

# Define the single LSTM layer with 512 units
lstm_layer = tf.keras.layers.LSTM(512, return_sequences=True)(concat)

lstm_layer2 = tf.keras.layers.LSTM(512)(lstm_layer)

flatten = tf.keras.layers.Flatten()(lstm_layer2)

# Define the intermediate dense layer
dense = tf.keras.layers.Dense(256)(flatten)

# Define the final output layers
dense1 = tf.keras.layers.Dense(len(chord_to_int), activation = "softmax")(dense)
dense2 = tf.keras.layers.Dense(len(duration_to_int), activation = "softmax")(dense)

# Define the model 
model = tf.keras.models.Model([chord_input, duration_input], [dense1, dense2])

In [204]:
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_33 (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
input_34 (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_32 (Embedding)        (None, None, 64)     808448      input_33[0][0]                   
__________________________________________________________________________________________________
embedding_33 (Embedding)        (None, None, 64)     1600        input_34[0][0]                   
____________________________________________________________________________________________

In [205]:
model.compile(loss = "categorical_crossentropy", optimizer = "adam")
model.fit([train_chords, train_durations], [target_chords, target_durations], batch_size=64, epochs = 20)

Train on 59365 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x2663220ec50>

In [208]:
def generate_samples(n_samples):
    def predict_next(chord_seq, dur_seq):
        out_chord, out_dur = model.predict([chord_seq, dur_seq])
        return out_chord, out_dur
    
    def make_predictions(num_steps, init_chord, init_dur, model):
        for i in range(num_steps):
            chord, dur = predict_next(np.asarray(init_chord[-31:]).reshape(1,-1), np.asarray(init_dur[-31:]).reshape(1,-1))
            chord_out = np.argmax(chord)
            dur_out = np.argmax(dur)
            init_chord.append(chord_out)
            init_dur.append(dur_out)
        return init_chord[32:], init_dur[32:]
    
    for n in tqdm(range(n_samples)):
        seed = np.random.randint(low = 0, high = train_chords.shape[0])
        CHORD, DURATION = make_predictions(100, train_chords[seed].tolist(), train_durations[seed].tolist(), model)
        CHORD = [int_to_chord[c] for c in CHORD]
        DURATION = [int_to_durations[d] for d in DURATION]
        generated_stream = stream.Stream()
        generated_stream.append(instrument.Piano())
        for i in range(len(CHORD)):
            try:
                generated_stream.append(note.Note(CHORD[i].replace(".", " "), quaterType = DURATION[i]))
            except:
                generated_stream.append(chord.Chord(CHORD[i].replace(".", " "), quaterType = DURATION[i]))
        generated_stream.write('midi', fp=path+'/generated/song{0}.mid'.format(n))

In [209]:
generate_samples(10)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


