In [1]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
import collections
import glob
import numpy as np
from numpy import expand_dims 
import pandas as pd
import pathlib
import pretty_midi
from sklearn.model_selection import train_test_split 
from collections import Counter

dataset

In [2]:
data_dir = pathlib.Path('data/maestro-v2.0.0')
if not data_dir.exists():
  tf.keras.utils.get_file(
      'maestro-v2.0.0-midi.zip',
      origin='https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip',
      extract=True,
      cache_dir='.', cache_subdir='data',
  )

drum notation is differnet and needs it's own set of instructions

In [3]:
is_drum = False
data_dir = pathlib.Path('data/maestro-v2.0.0')
filenames = glob.glob(str(data_dir/'**/*.mid*'))
print('Number of files:', len(filenames))

Number of files: 1282


read in the notes

In [4]:
def notes_in(notes,instrument,track_end,is_drum):
# Sort the notes by start time
    sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
    prev_start = sorted_notes[0].start

    for note in sorted_notes:
        start = note.start
        end = note.end
        pitch = note.pitch
        notes['pitch'].append(pitch)
        notes['start'].append(float(start)+track_end)
        if is_drum == False:
            notes['duration'].append(end - start)
        else:
            notes['duration'].append(1/4)
        if note == sorted_notes[-1]:
            track_end = note.end + track_end
            
    return notes,track_end

read in the files

In [5]:
# Extracting the notes from the sample MIDI file

def midi_to_notes(filenames: str,is_drum) -> pd.DataFrame:
    instrument = None
    track_end = 0
    notes = collections.defaultdict(list)
    #for midi_file in glob.glob(f'{midi_file}/*.mid'):
    for midi_file in filenames:
        print(midi_file)
        pm = pretty_midi.PrettyMIDI(midi_file)
        for instrument in pm.instruments:
            if is_drum == False and instrument.is_drum == False:
                notes,track_end = notes_in(notes,instrument,track_end,is_drum)
            if is_drum == True and instrument.is_drum == True:
                notes,track_end = notes_in(notes,instrument,track_end,is_drum)

    return pd.DataFrame({name: np.array(value) for name, value in notes.items()})


raw_notes = midi_to_notes(filenames[:5],is_drum)
raw_notes.head(10)

data\maestro-v2.0.0\2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi
data\maestro-v2.0.0\2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.midi
data\maestro-v2.0.0\2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav.midi
data\maestro-v2.0.0\2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_10_Track10_wav.midi
data\maestro-v2.0.0\2004\MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_02_Track02_wav.midi


Unnamed: 0,pitch,start,duration
0,71,1.092708,0.096875
1,55,1.279167,0.217708
2,71,1.288542,0.505208
3,59,1.463542,0.167708
4,62,1.633333,0.119792
5,72,1.786458,0.041667
6,67,1.803125,0.196875
7,74,1.983333,0.114583
8,57,1.983333,0.539583
9,72,2.0375,0.06875


In [6]:
raw_notes.tail(10)

Unnamed: 0,pitch,start,duration
37231,54,3145.107292,0.5875
37232,49,3145.110417,0.73125
37233,42,3145.113542,0.935417
37234,83,3146.541667,1.726042
37235,78,3146.546875,1.709375
37236,47,3146.547917,1.664583
37237,74,3146.547917,1.702083
37238,35,3146.551042,1.680208
37239,42,3146.551042,1.683333
37240,71,3146.552083,1.695833


output notes to midi

In [7]:
def notes_to_midi(notes: pd.DataFrame, out_file: str, instrument_program,drums,
                  velocity: int = 100) -> pretty_midi.PrettyMIDI:

    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(
      program=instrument_program,is_drum=drums)

    for _, note in notes.iterrows():
        start = float(note['start'])
        if is_drum == False:
          end = float(start + note['duration'])
        else:
           end = float(start + 1/4)
        pitch = int(note['pitch'])
        note = pretty_midi.Note(velocity=velocity, pitch=pitch,
                                start=start, end=end)
        instrument.notes.append(note)

    pm.instruments.append(instrument)
    pm.write(out_file)
    return pm

# piano roll
# test data preprocessing by recreeating input data
piano = 0
example_file = f'test_drums_{is_drum}.midi'
example_pm = notes_to_midi(raw_notes,example_file,piano,is_drum)

normalize pitch + whatever else & save normalization constants into max scalars

In [8]:
max_scalars = []
note_parameters = ['pitch', 'start', 'duration']
if is_drum == True:
    note_parameters.remove('duration')
print(note_parameters)
note_scales = ['pitch','start','duration']
for i in note_scales:
    param_max = max(raw_notes[i])
    max_scalars.append(param_max)
    raw_notes[i] = raw_notes[i]/param_max

['pitch', 'start', 'duration']


In [9]:
raw_notes.tail(10)

Unnamed: 0,pitch,start,duration
37231,0.514286,0.999541,0.057323
37232,0.466667,0.999542,0.071349
37233,0.4,0.999543,0.091269
37234,0.790476,0.999997,0.168411
37235,0.742857,0.999998,0.166785
37236,0.447619,0.999999,0.162415
37237,0.704762,0.999999,0.166074
37238,0.333333,1.0,0.163939
37239,0.4,1.0,0.164244
37240,0.67619,1.0,0.165464


make lists of data

In [10]:
lists = {}
for parameter in note_parameters:
    lists[parameter] = raw_notes[parameter]

train test split

In [11]:
# train test split
test_size = 0.1
pitch_train,pitch_test,start_train,start_test = train_test_split(lists['pitch'],lists['start'],test_size=test_size,shuffle=False)
if is_drum == False:
    duration_train,duration_test = train_test_split(lists['duration'], test_size=test_size,shuffle=False)

Create sequences for LSTM

In [12]:
def create_sequences(data,input_dim,output_dim):
    x = []
    y = []
    for i in range(len(data)-input_dim-output_dim):
        x.append(data[i:i+input_dim])
        y.append(data[i:i+input_dim:i+input_dim+output_dim])
    return np.array(x),np.array(y)

In [13]:
input_dim = 32
output_dim = 1
X_pitrain, X_potrain = create_sequences(pitch_train,input_dim,output_dim)
X_sitrain, X_sotrain = create_sequences(start_train,input_dim,output_dim)
X_ditrain, X_dotrain = create_sequences(duration_train,input_dim,output_dim)

double check data formatting 

In [14]:
print(X_potrain)
print(type(X_potrain))

[[0.67619048]
 [0.52380952]
 [0.67619048]
 ...
 [0.80952381]
 [0.8952381 ]
 [0.4       ]]
<class 'numpy.ndarray'>


models for pitch, start time and duration

In [15]:
# Define the model architecture
model = Sequential()
model.add(LSTM(128, input_shape=(input_dim, 1)))  # Input shape: (sequence_length, features)
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='MSE', optimizer='adam')

# Train the model
model.fit(X_pitrain, X_potrain, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x295ad1df2e0>

In [64]:
# Define the model2 architecture
model2 = Sequential()
model2.add(LSTM(32, input_shape=(input_dim, 1)))  # Input shape: (sequence_length, features)
model2.add(Dense(32, activation='relu'))
model2.add(Dense(32, activation='relu'))
model2.add(Dense(32, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))

# Compile the model2
model2.compile(loss='MSE', optimizer='adam')

# Train the model2
model2.fit(X_sitrain, X_sotrain, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x295acc7ba60>

In [56]:
# Define the model3 architecture
model3 = Sequential()
model3.add(LSTM(32, input_shape=(input_dim, 1)))  # Input shape: (sequence_length, features)
model3.add(Dense(32, activation='relu'))
model3.add(Dense(32, activation='relu'))
model3.add(Dense(32, activation='relu'))
model3.add(Dense(1, activation='sigmoid'))

# Compile the model3
model3.compile(loss='MSE', optimizer='adam')

# Train the model3
model3.fit(X_ditrain, X_dotrain, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x295a7c44ca0>

In [95]:
# Generate additional notes
num_additional_notes = 250  # Number of additional notes to generate

# Select the last sequence from `x_train`
last_psequence = X_pitrain[-1]
last_ssequence = X_sitrain[-1]
last_dsequence = X_ditrain[-1]

# Reshape the sequence to match the model's input shape
last_psequence = last_psequence.reshape((1, 32, 1))
last_ssequence = last_ssequence.reshape((1, 32, 1))
last_dsequence = last_dsequence.reshape((1, 32, 1))

pred_pnotes = []
pred_snotes = []
pred_dnotes = []

# Generate additional notes by repeatedly predicting the next note
for _ in range(num_additional_notes):

    predicted_pnote = model.predict(last_psequence)
    predicted_snote = model2.predict(last_ssequence)
    predicted_dnote = model3.predict(last_dsequence)

    pred_pnotes.append(predicted_pnote[0,0])
    pred_snotes.append(predicted_snote[0,0])
    pred_dnotes.append(predicted_dnote[0,0])

    # Append the predicted note to the last sequence
    last_psequence = np.concatenate([last_psequence[:, 1:, :], predicted_pnote.reshape((1, 1, 1))], axis=1)
    last_ssequence = np.concatenate([last_psequence[:, 1:, :], predicted_snote.reshape((1, 1, 1))], axis=1)
    last_dsequence = np.concatenate([last_psequence[:, 1:, :], predicted_dnote.reshape((1, 1, 1))], axis=1)




In [51]:
print(max_scalars)

[105, 3146.552083333333, 10.248958333333348]


In [96]:
print(type(pred_pnotes))
pred_ppnotes = np.array(pred_pnotes)*105
pred_ssnotes = np.array(pred_snotes)*314
pred_ddnotes = np.array(pred_dnotes)*3

<class 'list'>


In [97]:
print(pred_ssnotes)
print(pred_ddnotes)

[  1.2387595 164.02385   197.14487   198.3685    196.10431   198.51033
 198.97336   202.4997    197.85435   199.7636    203.38776   205.78276
 205.27193   203.86456   204.88052   202.74907   204.25105   207.06168
 202.57605   198.97583   197.92978   197.93327   200.04688   203.64963
 208.88004   204.57416   208.83856   212.32452   207.63858   211.27953
 213.28802   207.75291   211.24886   204.23851   204.1456    199.44696
 196.11034   198.29416   198.71765   202.21936   197.60281   199.50076
 203.11818   205.48848   204.96457   203.54805   204.55812   202.40874
 203.91396   206.72095   202.25789   198.65518   197.60269   197.60432
 199.722     203.34113   208.59552   204.32101   208.61557   212.10216
 207.3948    211.05432   213.05075   207.54178   211.07414   204.08926
 203.97003   199.26224   195.89555   198.0476    198.46223   201.9408
 197.355     199.24693   202.85435   205.20056   204.6613    203.23459
 204.23903   202.07047   203.57971   206.38062   201.94241   198.33736
 197.27

In [98]:
pred_notes = {'pitch':pred_ppnotes,
              'start':pred_ssnotes,
              'duration':pred_ddnotes}

df = pd.DataFrame(pred_notes)
notes_to_midi(df,'test.midi',piano,False)

<pretty_midi.pretty_midi.PrettyMIDI at 0x295acfdc9a0>