<a href="https://colab.research.google.com/github/NolanSmyth/deepMIDI/blob/master/midiMusic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Dependences

In [2]:
# Import Tensorflow 2.0
%tensorflow_version 2.x
import tensorflow as tf 

import numpy as np
import os
import glob
from tqdm import tqdm
from keras.layers import Activation
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from music21 import *

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [53]:
path = '/content/drive/My Drive/midi/*.mid'
def get_notes(path):

  ''' extracts all notes from midi files stored in drive given
  path (str) - path location 

  returns
  notes - list of all notes from all midi files
  '''

  notes = []
  midfs = glob.glob(path)
  for midf in midfs:
    notes_to_parse = None
    midi = converter.parse(midf)
    try:
      s2 = instrument.partitionByInstrument(midi)
      notes_to_parse = s2.parts[0].recurse()
    except:
      notes_to_parse = midi.flat.notes
    for element in notes_to_parse:
      if isinstance(element, note.Note):
        notes.append(str(element.pitch))
      elif isinstance(element, chord.Chord):
        notes.append('.'.join(str(n) for n in element.normalOrder))

  return notes  
notes = get_notes(path)

In [54]:
# create a dictionary mapping each name to a unique integer
pitch_names = sorted(set(notes))
note_to_int = {note:i for i, note in enumerate(pitch_names)}

# create mapping from indices back to note name
int_to_note = np.array(pitch_names)

num_unique_notes = len(pitch_names)
print('There are', num_unique_notes, 'unique characters in the dataset')

There are 125 unique characters in the dataset


In [7]:
def vectorize_notes(notes):
  # get names of all pitches used
  
  vectorized_notes = np.array([note_to_int[note] for note in notes])
  return vectorized_notes
vectorized_notes = vectorize_notes(notes)

In [97]:
print ('{} ---- characters mapped to int ----> {}'.format(repr(notes[:10]), vectorized_notes[:10]))

['5.8.10', '2.5.8.10', '3.7.10', '3.7.10', '5.8.10', '5.8.10', '7.10.1', '7.10.1', '7.10.1', '7.10.1'] ---- characters mapped to int ----> [46 25 35 35 46 46 57 57 57 57]


In [8]:
def get_batch(vectorized_notes, seq_length, batch_size):

  ''' create training exmaples for LSTM given
  vectorized_notes - a list ints representing all the notes from inputed midi files
  seq_length - length of each training sequence
  batch_size - size of each batch

  returns:
  x_batch - inputs
  y_batch - true labels for training
  '''

  # length of vectorized notes
  n = vectorized_notes.shape[0] - 1
  # randomly choose starting indices for the examples in training batch
  idx = np.random.choice(n - seq_length, batch_size)

  # create input and output sequences to feed into neural net
  input_batch = [vectorized_notes[i : i+seq_length] for i in idx]
  output_batch = [vectorized_notes[i+1 : i+seq_length+1] for i in idx]
  
  x_batch = np.reshape(input_batch, [batch_size, seq_length])
  y_batch = np.reshape(output_batch, [batch_size, seq_length])

  return x_batch, y_batch



In [9]:
x_batch, y_batch = get_batch(vectorized_notes, seq_length=5, batch_size=1)

for i, (input_idx, target_idx) in enumerate(zip(np.squeeze(x_batch), np.squeeze(y_batch))):
    print("Step {:3d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(int_to_note[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(int_to_note[target_idx])))

Step   0
  input: 61 ('8.0.3')
  expected output: 61 ('8.0.3')
Step   1
  input: 61 ('8.0.3')
  expected output: 61 ('8.0.3')
Step   2
  input: 61 ('8.0.3')
  expected output: 68 ('8.11.2')
Step   3
  input: 68 ('8.11.2')
  expected output: 68 ('8.11.2')
Step   4
  input: 68 ('8.11.2')
  expected output: 61 ('8.0.3')


In [10]:
def LSTM(rnn_units): 
  return tf.keras.layers.LSTM(
    rnn_units, 
    return_sequences=True, 
    recurrent_initializer='glorot_uniform',
    recurrent_activation='sigmoid',
    stateful=True,
  )

In [11]:
def create_network(num_unique_notes, embedding_dim, rnn_units, batch_size):
  ''' create the structure of the neural network given
  net_in - input for LSTM
  net_out - output for LSTM
  '''

  model = tf.keras.Sequential([
  tf.keras.layers.Embedding(num_unique_notes, embedding_dim, batch_input_shape=[batch_size, None]),
  LSTM(rnn_units),
  tf.keras.layers.Dropout(0.2),
  LSTM(rnn_units),
  tf.keras.layers.Dense(num_unique_notes)
  ])

  return model

In [55]:
model = create_network(num_unique_notes, embedding_dim=256, rnn_units=512, batch_size=32)
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (32, None, 256)           32000     
_________________________________________________________________
lstm_10 (LSTM)               (32, None, 512)           1574912   
_________________________________________________________________
dropout_5 (Dropout)          (32, None, 512)           0         
_________________________________________________________________
lstm_11 (LSTM)               (32, None, 512)           2099200   
_________________________________________________________________
dense_5 (Dense)              (32, None, 125)           64125     
Total params: 3,770,237
Trainable params: 3,770,237
Non-trainable params: 0
_________________________________________________________________


In [56]:
x, y = get_batch(vectorized_notes, seq_length=10, batch_size=32)
pred = model(x)
print("Input shape:      ", x.shape, " # (batch_size, sequence_length)")
print("Prediction shape: ", pred.shape, "# (batch_size, sequence_length, vocab_size)")


Input shape:       (32, 10)  # (batch_size, sequence_length)
Prediction shape:  (32, 10, 125) # (batch_size, sequence_length, vocab_size)


In [14]:
sampled_indices = tf.random.categorical(pred[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()
sampled_indices


array([ 2, 38, 18, 14, 53, 67, 42, 48, 13, 70])

In [57]:
print("Input: \n", repr("".join(int_to_note[x[0]])))
print()
print("Next Char Predictions: \n", repr("".join(int_to_note[sampled_indices])))

Input: 
 '101010101010105.8.102.5.8.1010.3'

Next Char Predictions: 
 '0.3.64.5.7.810.2106.88.11.15.115.8.111.78.11.3'


In [58]:
def compute_loss(labels, logits):
  loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
  return loss

example_batch_loss = compute_loss(y, pred)
# example_batch_loss = compute_loss('''TODO''', '''TODO''') # TODO

print("Prediction shape: ", pred.shape, " # (batch_size, sequence_length, vocab_size)") 
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (32, 10, 125)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.828505


In [59]:
### Hyperparameter setting and optimization ###

# Optimization parameters:
num_training_iterations = 1000  # Increase this to train longer
batch_size = 50  # Experiment between 1 and 64
seq_length = 50  # Experiment between 50 and 500
learning_rate = 5e-3  # Experiment between 1e-5 and 1e-1

# Model parameters: 
vocab_size = len(pitch_names)
embedding_dim = 64 
rnn_units = 512  # Experiment between 1 and 2048

# Checkpoint location: 
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "my_ckpt")

In [60]:
model = create_network(vocab_size, embedding_dim, rnn_units, batch_size)

optimizer = tf.keras.optimizers.Adam(learning_rate)

@tf.function
def train_step(x, y): 
  # Use tf.GradientTape()
  with tf.GradientTape() as tape:
  
    y_hat = model(x) 
  
    loss = compute_loss(y, y_hat) 

  # Now, compute the gradients 
  grads = tape.gradient(loss, model.trainable_variables) # TODO
  
  # Apply the gradients to the optimizer so it can update the model accordingly
  optimizer.apply_gradients(zip(grads, model.trainable_variables))
  return loss

##################
# Begin training!#
##################

history = []
if hasattr(tqdm, '_instances'): tqdm._instances.clear() # clear if it exists

for iter in tqdm(range(num_training_iterations)):

  # Grab a batch and propagate it through the network
  x_batch, y_batch = get_batch(vectorized_notes, seq_length, batch_size)
  loss = train_step(x_batch, y_batch)

  # Update the progress bar
  history.append(loss.numpy().mean())

  # Update the model with the changed weights!
  if iter % 100 == 0:     
    model.save_weights(checkpoint_prefix)
    
# Save the trained model and the weights
model.save_weights(checkpoint_prefix)

100%|██████████| 1000/1000 [00:26<00:00, 38.43it/s]


In [61]:
model = create_network(vocab_size, embedding_dim, rnn_units, batch_size=1) 

# Restore the model weights for the last checkpoint after training
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (1, None, 64)             8000      
_________________________________________________________________
lstm_14 (LSTM)               (1, None, 512)            1181696   
_________________________________________________________________
dropout_7 (Dropout)          (1, None, 512)            0         
_________________________________________________________________
lstm_15 (LSTM)               (1, None, 512)            2099200   
_________________________________________________________________
dense_7 (Dense)              (1, None, 125)            64125     
Total params: 3,353,021
Trainable params: 3,353,021
Non-trainable params: 0
_________________________________________________________________


In [62]:
def generate_text(model, start_string, generation_length=100):

  input_eval = [note_to_int[s] for s in start_string.split()]
  input_eval = tf.expand_dims(input_eval,0)

  text_generated = []

  model.reset_states()
  tqdm._instances.clear()

  for i in tqdm(range(generation_length)):
      predictions = model(input_eval)
      
      # Remove the batch dimension
      predictions = tf.squeeze(predictions, 0)
      
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
      
      # Pass the prediction along with the previous hidden state
      #   as the next inputs to the model
      input_eval = tf.expand_dims([predicted_id], 0)
      
      text_generated.append(int_to_note[predicted_id]) # TODO 
      # text_generated.append('''TODO''')
    
  return (start_string + ' '.join(text_generated))

In [68]:
np.random.choice(notes)

'F#2'

In [75]:
generated_text = generate_text(model, start_string=np.random.choice(notes), generation_length=50)

100%|██████████| 50/50 [00:00<00:00, 139.29it/s]


In [45]:
def create_midi(generated_text):
  '''Converts the output from the prediction into notes and then creates a midi 
  file from the notes

  returns Stream
  '''

  offset = 0 #may change this later
  output_notes = []

  for pattern in generated_text:
      if ('.' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
          new_note = note.Note(int(current_note))
          new_note.storedInstrument = instrument.Piano()
          notes.append(new_note)
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
      else:
        new_note = note.Note(pattern)
        new_note.offset = offset
        output_notes.append(new_note)
        
      offset += .5

  midi_stream = stream.Stream(output_notes)
  return midi_stream

In [25]:
#To play midi in notebook
!apt install fluidsynth
!cp /usr/share/sounds/sf2/FluidR3_GM.sf2 ./font.sf2
!pip install midi2audio
from midi2audio import FluidSynth
from IPython.display import Audio

Reading package lists... Done
Building dependency tree       
Reading state information... Done
fluidsynth is already the newest version (1.1.9-1).
0 upgraded, 0 newly installed, 0 to remove and 11 not upgraded.


In [46]:
#from first original midi file loaded
midi_orig_test = create_midi(notes[0:100])
midi_orig_test.write('midi', fp='orig_output.mid')
FluidSynth("font.sf2").midi_to_audio('orig_output.mid', 'test_orig.wav')
Audio("test_orig.wav")

In [76]:
#output
midi_stream = create_midi(generated_text.split())
midi_stream.write('midi', fp='test_output.mid')
FluidSynth("font.sf2").midi_to_audio('test_output.mid', 'test.wav')
Audio("test.wav")