In [13]:
import pretty_midi
import pandas as pd
import os

# Extract necessary data crom each note in each midi file
def parse_midi_file(file_path):
    midi_data = pretty_midi.PrettyMIDI(file_path)
    note_sequences = [] 

    for note in midi_data.instruments[0].notes:
        note_sequences.append({
            'Filename': filename,
            'pitch': note.pitch,
            'start': note.start,
            'duration': note.end - note.start
        })

    return note_sequences

midi_directory = "/Users/cameronherbert/dissertation/dissertation midis"
dfs = []

# Parse each midi file and save data to data frame
for filename in os.listdir(midi_directory):
    if filename.endswith(".mid"):
        file_path = os.path.join(midi_directory, filename)
        parsed_sequences = parse_midi_file(file_path)
        df = pd.DataFrame(parsed_sequences, columns=['Filename', 'pitch', 'start', 'duration'])     
        dfs.append(df)

df =  pd.concat(dfs, ignore_index=True)
df



Unnamed: 0,Filename,pitch,start,duration
0,Cymatics - Python MIDI 5 - C Min.mid,55,0.000000,0.500000
1,Cymatics - Python MIDI 5 - C Min.mid,48,0.005208,0.593750
2,Cymatics - Python MIDI 5 - C Min.mid,67,0.500000,0.104167
3,Cymatics - Python MIDI 5 - C Min.mid,51,0.010417,0.614583
4,Cymatics - Python MIDI 5 - C Min.mid,60,0.000000,0.625000
...,...,...,...,...
49339,Rogi-140bpm-Cheat-Code.mid,74,6.428565,0.214285
49340,Rogi-140bpm-Cheat-Code.mid,45,5.999994,0.857142
49341,Rogi-140bpm-Cheat-Code.mid,54,6.214280,0.642856
49342,Rogi-140bpm-Cheat-Code.mid,74,6.642851,0.214285


In [14]:
# Group df by filename and create a dictionary to store the DataFrames
dfs_dict = {}
for i, (filename, df) in enumerate(df.groupby('Filename')):
    df = df.drop(columns=['Filename'])
    dfs_dict['df_{}'.format(i)] = df

# Print the first 5 rows of each DataFrame
for name, df in dfs_dict.items():
    print(f"DataFrame: {name}")
    print(df.head())
    print("---------------------")

DataFrame: df_0
      pitch     start  duration
6777     51  0.000000  0.193015
6778     63  0.000000  0.220588
6779     70  0.220588  0.220588
6780     77  0.441176  0.220588
6781     51  0.441176  0.220588
---------------------
DataFrame: df_1
       pitch     start  duration
44578     61  0.027174  0.217392
44579     73  0.217392  0.217392
44580     61  0.434783  0.217392
44581     73  0.652175  0.217391
44582     61  0.869566  0.217391
---------------------
DataFrame: df_2
      pitch     start  duration
1936     74  0.000000  1.764704
1937     54  0.000000  1.764704
1938     66  0.000000  1.764704
1939     59  0.000000  1.792277
1940     61  1.764704  1.764704
---------------------
DataFrame: df_3
       pitch     start  duration
10328     59  0.000000  0.579044
10329     55  0.000000  0.579044
10330     43  0.000000  0.579044
10331     69  0.110294  0.579044
10332     62  0.193015  0.579044
---------------------
DataFrame: df_4
       pitch     start  duration
41569     76  0.028

In [26]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

sequence_length = 20
inputs, targets = [], []

# Sliding window approach to create inputs and targets
for df in dfs_dict.values():
    for i in range(len(df) - sequence_length):
        inputs.append(df.iloc[i:i+sequence_length])
        targets.append(df.iloc[i+sequence_length])

inputs = np.array(inputs)
targets = np.array(targets)

# Build the model
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(inputs.shape[1], inputs.shape[2])))
model.add(tf.keras.layers.Flatten())
model.add(Dense(3))
model.add(Dense(3))
model.add(Dense(3))

model.compile(loss='mean_squared_error', optimizer='adam')
print(model.summary())

# Train the model 
model.fit(inputs, targets, epochs=10, batch_size=64)

      pitch     start  duration
6777     51  0.000000  0.193015
6778     63  0.000000  0.220588
6779     70  0.220588  0.220588
6780     77  0.441176  0.220588
6781     51  0.441176  0.220588
6782     46  0.634190  0.248162
6783     58  0.661764  0.220588
6784     65  0.882352  0.220588
6785     46  1.075367  0.248161
6786     77  1.102940  0.220588
6787     46  1.323528  0.027573
6788     41  1.737131  0.027573
6789     53  1.764704  0.220588
6790     41  1.764704  0.220588
6791     70  1.985292  0.220588
6792     41  2.178307  0.027573
6793     77  2.205880  0.220588
6794     41  2.205880  0.248162
6795     43  2.398894  0.248162
6796     55  2.426468  0.220588
6797     72  2.702203  0.110294
6798     74  2.647056  0.220588
6799     72  2.922791  0.110294
6800     43  2.840070  0.248162
6801     74  2.867644  0.220588
6802     43  3.088232  0.027573
6803     72  3.143379  0.110294
6804     74  3.088232  0.220588
6805     70  3.308820  0.220588
6806     51  3.529408  0.193014
6807    

<keras.src.callbacks.History at 0x17ae52dd0>

In [27]:
# Generate a new sequence
seed = inputs[5]  
output = []

for i in range(50):  
    prediction = model.predict(seed.reshape(1, sequence_length, 3))
    output.append(prediction)
    seed = np.vstack((seed[1:], prediction))

output = np.array(output)



In [28]:
print(output)

[[[58.04838     3.4510233   0.5509974 ]]

 [[56.899345    3.5890808   0.6607543 ]]

 [[54.732807    3.9043021   0.4545265 ]]

 [[57.10874     3.82821     0.7833215 ]]

 [[54.565414    4.0499787   0.5362793 ]]

 [[55.315353    4.372584    0.51339877]]

 [[55.704796    4.516183    0.62947047]]

 [[57.83039     4.726902    0.818905  ]]

 [[55.50477     5.0764484   0.72319424]]

 [[56.763863    5.4703526   0.7388555 ]]

 [[57.120956    5.5245957   0.9784266 ]]

 [[58.727806    5.8428636   0.8931285 ]]

 [[57.15808     6.0115075   0.879717  ]]

 [[59.127716    6.242504    1.0572182 ]]

 [[59.554127    6.481705    0.9904095 ]]

 [[59.697193    6.707251    1.0532023 ]]

 [[58.846245    6.928325    1.1892658 ]]

 [[58.0973      7.2116675   1.2755443 ]]

 [[57.123756    7.4076014   1.348817  ]]

 [[58.314846    7.6840897   1.4221212 ]]

 [[58.094368    7.9351735   1.4676086 ]]

 [[58.261337    8.208346    1.5045377 ]]

 [[58.341255    8.47827     1.4849321 ]]

 [[58.710438    8.752302    1.5437

In [29]:
import pretty_midi

# Initialize a PrettyMIDI object
midi = pretty_midi.PrettyMIDI()

# Initialize an Instrument instance for a piano (instrument number 0)
piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
piano = pretty_midi.Instrument(program=piano_program)

# Reshape output
output = output.squeeze()

# Iterate over note sequence
for row in output:
    pitch, start_time, duration = row
    # Create a Note instance for this note
    note = pretty_midi.Note(
        velocity= 100,
        pitch=int(pitch),
        start=start_time,
        end=start_time+duration
    )
    print(note)
    # Add it to our piano instrument
    piano.notes.append(note)

# Add the piano instrument to the PrettyMIDI object
midi.instruments.append(piano)

# Write out the MIDI data
midi.write('AI.mid')

Note(start=3.451023, end=4.002021, pitch=58, velocity=100)
Note(start=3.589081, end=4.249835, pitch=56, velocity=100)
Note(start=3.904302, end=4.358829, pitch=54, velocity=100)
Note(start=3.828210, end=4.611532, pitch=57, velocity=100)
Note(start=4.049979, end=4.586258, pitch=54, velocity=100)
Note(start=4.372584, end=4.885983, pitch=55, velocity=100)
Note(start=4.516183, end=5.145653, pitch=55, velocity=100)
Note(start=4.726902, end=5.545807, pitch=57, velocity=100)
Note(start=5.076448, end=5.799643, pitch=55, velocity=100)
Note(start=5.470353, end=6.209208, pitch=56, velocity=100)
Note(start=5.524596, end=6.503022, pitch=57, velocity=100)
Note(start=5.842864, end=6.735992, pitch=58, velocity=100)
Note(start=6.011508, end=6.891224, pitch=57, velocity=100)
Note(start=6.242504, end=7.299722, pitch=59, velocity=100)
Note(start=6.481705, end=7.472115, pitch=59, velocity=100)
Note(start=6.707251, end=7.760453, pitch=59, velocity=100)
Note(start=6.928325, end=8.117591, pitch=58, velocity=10