In [1]:
import glob
import os

# running on a vm, no gpu
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import numpy as np
import tensorflow as tf
from music21 import *
import random
from matplotlib import pyplot as plt
from fractions import Fraction
import json
import pandas as pd
import pretty_midi
from typing import Optional

# constants
NUM_PIANO_KEYS = 88
A0_MIDI_OFFSET = 21

2024-11-17 04:52:41.119139: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-17 04:52:41.606718: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-17 04:52:42.023795: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731819162.335000   17138 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731819162.428556   17138 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-17 04:52:43.267622: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [24]:
notes = []
offsets = []
durations = []

# change path to read in different midi files
for file in glob.glob('./*.mid', recursive=True):
    try:
        mid = converter.parse(file)
    except:
        print(file)
    notes_to_parse = None
    prev_offset = 0
          
    notes_to_parse = mid.flatten().notes
    
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            # One hot encoding of pitch by piano key
            arr = np.zeros(NUM_PIANO_KEYS)
            try:
                arr[element.pitch.midi - A0_MIDI_OFFSET] = 1
            except IndexError:
                # removes files that had note outside the range of a piano
                os.remove(file)
                break
                
        
            notes.append(arr)
            
            durations.append(str(element.quarterLength))
            
            offsets.append(round(float(element.offset - prev_offset), 3))
            prev_offset = element.offset
            
        elif isinstance(element, chord.Chord):
            # if an element is a chord, encode each note separately
            isFirstNote = True
            for n in element:
                arr = np.zeros(NUM_PIANO_KEYS)
                try:
                    arr[n.pitch.midi - A0_MIDI_OFFSET] = 1
                except IndexError:
                    try:
                        os.remove(file)
                        break
                    except FileNotFoundError:
                        break
                    
                notes.append(arr)
                
                durations.append(str(n.quarterLength))
                
                # offset of first note is chord offset, offset of other notes is 0
                if isFirstNote:
                    offsets.append(round(float(element.offset - prev_offset), 3))
                    prev_offset = element.offset
                    isFirstNote = False
                else:
                    offsets.append(float(0))

notes = np.asarray(notes)
len(notes)

6445

In [25]:
# create dictionaries to one hot encode and decode
duration_map = {d: i for i, d in enumerate(sorted(set(durations)))}
reverse_duration = {i: d for i, d in enumerate(sorted(set(durations)))}
offset_map = {o: i for i, o in enumerate(sorted(set(offsets)))}
reverse_offset = {i: o for i, o in enumerate(sorted(set(offsets)))}

In [26]:
duration_map

{'0.25': 0,
 '0.5': 1,
 '0.75': 2,
 '1.0': 3,
 '1.5': 4,
 '2.0': 5,
 '2/3': 6,
 '3.0': 7}

In [27]:
offset_map

{0.0: 0, 0.5: 1, 1.0: 2, 1.5: 3, 2.0: 4, 3.0: 5}

In [28]:
# encodes the durations
temp = []
size = len(duration_map)
for duration in durations:
    arr = np.zeros(size)
    arr[duration_map[duration]] = 1
    temp.append(arr)
durations = np.asarray(temp)

In [29]:
#encodes the offsets
temp = []
size = len(offset_map)
for offset in offsets:
    arr = np.zeros(size)
    try:
        arr[offset_map[offset]] = 1
    except IndexError:
        print(offset, offset_map[offset])
        print(arr[offset_map[offset]])
    temp.append(arr)
offsets = np.asarray(temp)

In [30]:
# combines the three vectors per note into a single one
train_notes = np.concatenate([notes, durations, offsets], axis = 1)

# creates groupings of 50 inputs notes for each output note for training
seq_length = 50
inputs = []
note_outputs = []
duration_outputs = []
offset_outputs = []
for i in range(len(notes) - seq_length):
    inputs.append(train_notes[i : i + seq_length])
    
    note_outputs.append(notes[i + seq_length])
    duration_outputs.append(durations[i + seq_length])
    offset_outputs.append(offsets[i + seq_length])
    
input_size = len(inputs)
inputs = np.asarray(inputs)
inputs.reshape(input_size, seq_length, len(train_notes[0]))

note_outputs = np.asarray(note_outputs)
duration_outputs = np.asarray(duration_outputs)
offset_outputs = np.asarray(offset_outputs)

In [31]:
input_shape = inputs[0].shape
learning_rate = 0.005

inp = tf.keras.Input(input_shape)
lstm = tf.keras.layers.LSTM(512)(inp)
drop = tf.keras.layers.Dropout(0.5)(lstm)
dense = tf.keras.layers.Dense(256)(drop)
out = {
    "pitch": tf.keras.layers.Dense(NUM_PIANO_KEYS, name = "pitch", activation = "softmax")(dense),
    "duration": tf.keras.layers.Dense(len(duration_map), name = "duration", activation = "softmax")(dense),
    "offset": tf.keras.layers.Dense(len(offset_map), name = "offset", activation = "softmax")(dense),
}

model = tf.keras.Model(inp, out)

model.compile(
    loss = "categorical_crossentropy",
    loss_weights = {
        'pitch': 1.0,
        'step': 1.0,
        'duration':1.0,
    },
    optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate),
    metrics = ["accuracy", "accuracy", "accuracy"]
)

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 50, 102)]            0         []                            
                                                                                                  
 lstm_1 (LSTM)               (None, 512)                  1259520   ['input_2[0][0]']             
                                                                                                  
 dropout_1 (Dropout)         (None, 512)                  0         ['lstm_1[0][0]']              
                                                                                                  
 dense_1 (Dense)             (None, 256)                  131328    ['dropout_1[0][0]']           
                                                                                            

In [32]:
model.load_weights("./weights/single_piece/weights.weights.h5")

ValueError: Layer 'lstm_cell' expected 3 variables, but received 0 variables during loading. Expected: ['lstm_1/lstm_cell/kernel:0', 'lstm_1/lstm_cell/recurrent_kernel:0', 'lstm_1/lstm_cell/bias:0']

In [None]:
%%time
epochs = 15

history = model.fit(
    x = inputs,
    y = {"pitch": note_outputs, "duration": duration_outputs, "offset": offset_outputs},
    epochs = epochs,
    callbacks = tf.keras.callbacks.EarlyStopping(
        monitor = 'loss',
        patience = 10,
        verbose = 1,
        restore_best_weights = True),
    validation_split = 0.2
)

2024-11-17 04:56:27.038130: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 506612400 exceeds 10% of free system memory.


Epoch 1/15
[1m179/644[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m4:21[0m 562ms/step - duration_accuracy: 0.4732 - duration_loss: 1.8730 - loss: 7.5003 - offset_accuracy: 0.5096 - offset_loss: 1.5183 - pitch_accuracy: 0.0402 - pitch_loss: 4.1089

In [None]:
plt.plot(history.epoch, history.history['loss'], label='total train loss')
plt.plot(history.epoch, history.history['val_loss'], label='total validation loss')
plt.legend()
plt.show()

In [None]:
plt.plot(history.epoch, history.history['duration_accuracy'], label='train duration accuracy')
plt.plot(history.epoch, history.history['val_duration_accuracy'], label='validation duration accuracy')

plt.plot(history.epoch, history.history['offset_accuracy'], label='train offset accuracy')
plt.plot(history.epoch, history.history['val_offset_accuracy'], label='validation offset accuracy')

plt.plot(history.epoch, history.history['pitch_accuracy'], label='train pitch accuracy')
plt.plot(history.epoch, history.history['val_pitch_accuracy'], label='validation pitch accuracy')

plt.legend()
plt.show()

In [51]:
# creates 50 random notes
seed_notes = []
for i in range(50):
    pitch_index = random.randint(0, NUM_PIANO_KEYS - 1)
    pitch = np.zeros(NUM_PIANO_KEYS)
    pitch[pitch_index] = 1
    
    duration_index = random.randint(0, len(duration_map) - 1)
    duration = np.zeros(len(duration_map))
    duration[duration_index] = 1
    
    offset_index = random.randint(0, len(offset_map) - 1)
    offset = np.zeros(len(offset_map))
    offset[offset_index] = 1
    
    seed_notes.append(np.concatenate([pitch, duration, offset]))
    
seed_notes = np.asarray(seed_notes)

In [52]:
# select seed notes from training music
# seed_notes = inputs[0]

# turns the seed note vectors back into actual music21 notes
chord_builder = []
note_stream = []
offset = 0

for g in reversed(seed_notes):
    n = note.Note(np.argmax(g[:NUM_PIANO_KEYS]) + A0_MIDI_OFFSET)
    str_duration = reverse_duration[np.argmax(g[NUM_PIANO_KEYS : NUM_PIANO_KEYS + len(duration_map)])]
    try:
        n.quarterLength = float(str_duration)
    except:
        n.quarterLength = Fraction(str_duration)
    # if offset is 0, stores it so that it can be added to a chord
    offset = reverse_offset[np.argmax(g[-1 * len(offset_map):])]
    if offset == 0:
        chord_builder.append(n)
    elif len(chord_builder) == 0:
        note_stream.append((n, offset))
    else:
        note_stream.append((chord.Chord(chord_builder), offset))
        chord_builder = []
if len(chord_builder) == 1:
    note_stream.append((chord_builder[0], 0))
elif len(chord_builder) > 1:
    note_stream.append((chord.Chord(chord_builder), 0))
note_stream.reverse()

seed_stream = stream.Stream()
previous_offset = 0
for n, off in note_stream:
    previous_offset += off
    seed_stream.insert(previous_offset, n)
seed_stream.show("midi")

In [53]:
# generates new notes by adding the newly generated note to the end of the seed notes
num_notes = 100
generated_notes = []
for i in range(num_notes):
    new_note = model.predict(tf.expand_dims(seed_notes, 0))
    new_input = np.concatenate([new_note["pitch"], new_note["duration"], new_note["offset"]], axis = 1)
    generated_notes.append(new_note)
    seed_notes = np.delete(seed_notes, 0, axis = 0)
    seed_notes = np.append(seed_notes, new_input, axis = 0)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4

In [54]:
# turns the generated notes into music21 notes
chord_builder = []
note_stream = []
offset = 0
temperature = 1.5
for i in reversed(range(len(generated_notes))):
    g = generated_notes[i]
    probs = g["pitch"].reshape(-1)
    pitch_idx = np.random.choice(len(probs), p=probs**temperature/np.sum(probs**temperature))

    n = note.Note(pitch_idx + A0_MIDI_OFFSET)
    try:
        n.quarterLength = float(reverse_duration[np.argmax(g["duration"])])
    except:
        n.quarterLength = Fraction(reverse_duration[np.argmax(g["duration"])])
    # if offset is 0, stores it so that it can be added to a chord
    offset = reverse_offset[np.argmax(g["offset"])]
    if offset == 0:
        chord_builder.append(n)
    elif len(chord_builder) == 0:
        note_stream.append((n, offset))
    else:
        note_stream.append((chord.Chord(chord_builder), offset))
        chord_builder = []
if len(chord_builder) == 1:
    note_stream.append((chord_builder[0], 0))
elif len(chord_builder) > 1:
    note_stream.append((chord.Chord(chord_builder), 0))
note_stream.reverse()

s = stream.Stream()
previous_offset = 0
for n, off in note_stream:
    previous_offset += off
    s.insert(previous_offset, n)
s.show("midi")

# writes the midi file of the output
s.write("midi", "output.mid")

In [55]:
# writes the midi file of the output
s.write("midi", "output.mid")

'output.mid'

In [58]:
model.save_weights("./weights/multiple_pieces/weights.weights.h5")

In [80]:
offset_map

{0.0: 0, 0.5: 1, 1.0: 2, 2.0: 3, 3.0: 4, 6.0: 5}

In [88]:
duration_map

{'0.25': 0,
 '0.5': 1,
 '0.75': 2,
 '1.0': 3,
 '1.5': 4,
 '2.0': 5,
 '2/3': 6,
 '3.0': 7}

In [87]:
# save offset and duration maps
with open('weights/multiple_pieces/offset_map.txt', "w") as f:
    for k in offset_map:
        f.write(f"{k}:{offset_map[k]}\n")
        
with open('weights/multiple_pieces/duration_map.txt', "w") as f:
    for k in duration_map:
        f.write(f"{k}:{duration_map[k]}\n")

In [92]:
offset_map = dict()
with open("weights/single_piece/offset_map.txt") as f:
    for line in f:
        key, value = line.strip().split(":")
        try:
            offset_map[float(key)] = int(value)
        except:
            offset_map[Fraction(key)] = int(value)
reverse_offset = {offset_map[k]:k for k in offset_map}
duration_map = dict()
with open("weights/single_piece/duration_map.txt") as f:
    for line in f:
        key, value = line.strip().split(":")
        try:
            duration_map[float(key)] = int(value)
        except:
            duration_map[Fraction(key)] = int(value)
reverse_duration = {duration_map[k]:k for k in duration_map}

In [None]:
# used to read in previous dictionaries, used for when transfer learning
# so that the encodings were consistent between models
# offset_map = dict()
# with open('mappings/duration_map.txt') as json_file:
#     duration_map = json.load(json_file)  
# reverse_duration = dict()
# with open('mappings/reverse_duration.txt') as f:
#     for line in f:
#         key, value = line.strip().split(":")
#         reverse_duration[int(key)] = value
# reverse_offset = dict()
# with open('mappings/reverse_offset.txt') as f:
#     for line in f:
#         key, value = line.strip().split(":")
#         reverse_offset[int(key)] = float(value)