In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
#import statements
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from music21 import converter, instrument, note, chord, stream, duration
import glob
import os
from itertools import chain
import copy
import numpy as np
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from keras.layers import CuDNNLSTM,Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from numpy import argmax
import gc

In [2]:
def data_extractor(directory):
  """
    Summary:
    Function converts midi files to metadata and appends nested metadata lists into one large list
    composed of all the songs in the dataset.

    Parameters:
    directory: String representation of directroy where midi files are located

    Returns:
    list: sequential midi file metadata 
   """
  notes = []
  offsets = []
  for file in glob.glob(directory):
          mid = converter.parse(file)
          notes_to_parse = None
          prev_offset = 0
          
          try: 
              s2 = instrument.partitionByInstrument(mid)
              notes_to_parse = s2.parts[0].recurse() 
              
          except: 
              notes_to_parse = mid.flat.notes

          for i,element in enumerate(notes_to_parse):
              if isinstance(element, note.Note):
                  notes.append(str(element.pitch))
                  offset_dif = float(element.offset-prev_offset)
              
                  offsets.append(round(offset_dif,3))
                  prev_offset = element.offset
             
                 
              elif isinstance(element, chord.Chord):
                  notes.append('.'.join(str(n) for n in element.normalOrder))
                  offset_dif = float(element.offset-prev_offset)
                  
                  offsets.append(round(offset_dif,3))
                  prev_offset = element.offset

  return [notes,offsets]

In [3]:
data = data_extractor("/content/drive/MyDrive/midi_files/*.mid")
note_data = data[0]
offset_data = data[1]

unique_note_number = len(list(set(note_data)))
unique_notes = sorted(list(set(note_data)))
unique_offset_number = len(list(set(offset_data)))
unique_offsets = sorted(list(set(offset_data)))

In [13]:
def test_data_pipeline():
  data = data_extractor("/content/drive/MyDrive/midi_short/*.mid")
  seg = make_segments(data_array = offset_data, unique_values = unique_offsets,segment_type = 'offset')
  
  check_list = []
  for i in seg[0][1]:
    check_list.append(one_hot_decode(i,unique_offsets)[0])

  if check_list == offset_data[1:len(seg[0][1])+1]:

    print('pipeline working')
  else:
    print('pipeline not working')
test_data_pipeline()

  


pipeline working


In [5]:
def to_midi(notes,offsets):
  """
  Summary:
  Takes midi metadata and converts it to a Music21 stream. The stream can then easily be converted into a midi file.

  Parameters:
  metadata: midi metadata (notes,chords,offsets)

  Returns:
  list: Music21 stream object
  """
  offset = offsets[0]
  s = stream.Stream()
  for i,ele in enumerate(notes):
    if ele[0].isalpha():
      n = note.Note(ele)
      s.insert(offset,n)
      offset += offsets[i]
    else:
      chords = list(map(int,ele.split('.')))
      c = chord.Chord(chords)
      s.insert(offset,c)
      offset += offsets[i]
  return s

In [6]:
def one_hot_encode(vector, all_values):
  encoded_vectors = []
  int_to_index = dict((c, i) for i, c in enumerate(all_values))
  for i in vector:
    zero = [0]*(len(all_values)-1)
    zero.insert(int_to_index[i],1)
    encoded_vectors.append(zero)
  
  return encoded_vectors

In [7]:
from numpy import argmax
def one_hot_decode(vector,all_values):
  decoded_vector = []
  index_to_int = dict((i, c) for i, c in enumerate(all_values))
  decoded_vector.append(index_to_int[argmax(vector)])
  return decoded_vector

In [8]:
import keras.utils
segment_length = 64
def make_segments(data_array,unique_values, seq_length = segment_length,segment_type = 'note'):
  input_seq = []
  output_seq = []

  processed_data = one_hot_encode(data_array,unique_values)
  
  for i in range(0,len(processed_data) - seq_length,1):
    input_seq.append([processed_data[i:i+seq_length]])
    output_seq.append(processed_data[seq_length + i])

  del processed_data;gc.collect()
  
  input_seq = np.stack(arr[0] for arr in input_seq)
  output_seq = np.array(output_seq)

  return input_seq, output_seq

In [9]:
note_model_data = make_segments(data_array = note_data, unique_values = unique_notes,segment_type = 'note')
offset_model_data = make_segments(data_array = offset_data, unique_values = unique_offsets,segment_type = 'offset')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [10]:
X_train_note, X_test_note, y_train_note, y_test_note = train_test_split(note_model_data[0], note_model_data[1],test_size=0.2)
X_train_off, X_test_off, y_train_off, y_test_off = train_test_split(offset_model_data[0], offset_model_data[1],test_size=0.2)

In [11]:
del note_model_data
del offset_model_data
gc.collect()

65

In [12]:
def make_model(input_shape,output_shape):
  model = Sequential()
  model.add(CuDNNLSTM(512,input_shape=input_shape,return_sequences=False))
  model.add(Dropout(0.5))
  model.add(Dense(256))
  model.add(Dropout(0.25))
  model.add(Dense(output_shape,activation = 'softmax'))

  model.compile(
      loss='categorical_crossentropy',
      optimizer=Adam(learning_rate = .001),
      metrics=['accuracy']
  )
  return model

In [14]:
model_notes = make_model(input_shape = (segment_length,unique_note_number), output_shape = unique_note_number)
model_offsets = make_model(input_shape = (segment_length,unique_offset_number), output_shape = unique_offset_number)

In [15]:
my_callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=10)
]

In [16]:
history_note = model_notes.fit(X_train_note,y_train_note,epochs=150, validation_split = 0.2, callbacks = my_callbacks)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150


In [17]:
history_offset = model_offsets.fit(X_train_off,y_train_off,epochs=150, validation_split = 0.2,callbacks = my_callbacks)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150


In [73]:
def sample(preds, temperature=1.0):
    """Helper function to sample an index from a probability array."""
    preds = np.asarray(preds).astype('float64')
    preds = np.exp(np.log(preds) / temperature)  
    preds = preds / np.sum(preds)                
    probas = np.random.multinomial(1, preds, 1)  
    return np.argmax(probas)                     

In [99]:
def music_maker(seed_vec,model_type,number_unique,unique,num_notes = 64):
  music = []
  diversity = 0.7
  arr = np.zeros((len(seed_vec)+num_notes,number_unique))
  for c,i in enumerate(seed_vec):
    arr[c] = i

  for i in range(0,num_notes,1):
    d_arr = np.zeros(number_unique)
    pred = model_type.predict(np.reshape(arr[i:len(arr)-num_notes+i], (1, len(arr[i:len(arr)-num_notes+i]),number_unique)),verbose = 0)[0]
    diverse = sample(pred, diversity)
    d_arr[diverse] = 1
    music.append(one_hot_decode(d_arr,all_values = unique)[0])

    arr[len(seed_vec)+i] = d_arr
  
  return music

In [100]:
import random
randnum = random.randrange(0,len(X_test_note))
test_note = X_test_note[randnum]
test_offset = X_test_off[randnum]
generated_music_note = music_maker(test_note, model_notes, unique_note_number,unique_notes)
generated_music_offset = music_maker(test_offset,model_offsets,unique_offset_number,unique_offsets)

In [101]:
seed_notes = []
seed_offsets = []
for n in test_note:
  seed_notes.append(one_hot_decode(n ,unique_notes)[0]) 
for o in test_offset:
  seed_offsets.append(one_hot_decode(o ,unique_offsets)[0]) 

In [102]:
to_midi(generated_music_note,generated_music_offset).write('midi', "generated_classical.mid")
to_midi(seed_notes,seed_offsets).write('midi', "seed_classical.mid")


'seed_classical.mid'