CHECKING SYSTEM HARDWARE AND IMPORTS

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if not (tf.config.list_physical_devices('TPU') or tf.config.list_physical_devices('GPU') ):
    print("No GPU was detected. Neural nets can be very slow without a GPU.")
    if "google.colab" in sys.modules:
        print("Go to Runtime > Change runtime and select a GPU hardware "
              "accelerator.")
    if "kaggle_secrets" in sys.modules:
        print("Go to Settings > Accelerator and select GPU.")
elif tf.config.list_physical_devices("TPU"):
  print("LEZZZZZZ GOOOO  TPU") #ALL GOOD
else:
  print("Using GPU WHOOHOO")
physical_devices = tf.config.list_physical_devices()
for device in physical_devices:
    print("Device name:", device.name)


Using GPU WHOOHOO
Device name: /physical_device:CPU:0
Device name: /physical_device:GPU:0


Imports for the Project

In [3]:
import glob
import pickle
from music21 import converter, instrument, note, chord

## GET NOTES AND CHORDS FROM THE MIDI FILES

In [4]:
def get_notes():

  notes = []

  for midi_file in glob.glob("/content/drive/MyDrive/midi_songs/*.mid"): # all .mid files in midi_songs
    midi = converter.parse(midi_file) #WHAT DOES THIS DO EXCATLY?

    print("Parsing %s" % midi_file)

    notes_to_parse = None

    try:
      s2 = instrument.partitionByInstrument(midi)
      notes_to_parse = s2.parts[0].recurse() # WHAT DOES THIS DO EXACTLY?
    except:
      notes_to_parse = midi.flat.notes # file has notes in a flat structure

    for element in notes_to_parse:
      if isinstance(element, note.Note):
        notes.append(str(element.pitch))
      elif isinstance(element, chord.Chord):
        notes.append(".".join(str(n) for n in element.normalOrder)) # Since only piano composer it is only chords and notes
        """
        1. here when song ends and another song begins it hinders generation, example ending in pianisomo will affect the start of next song
        2. need all songs of same nature ie counts/beats, same genre else bias training when ending one song will impact generation of next song
         so something like zero padding in convnets to make all songs of same length and then convert to original length
        """
    """
    See how other midi files in hip hop and pop and clssical and metal and rock have different music21.("notes", "chords", "stuff etc")
    """

  # with open('notes', 'wb') as filepath:
  #   pickle.dump(notes, filepath) # when saving notes already saved and preprocessed so just use that

  return notes



I haven't even understood the data to design a input/output sequence will come back to this later

## Preparing the input/output sequence
input is sequence, output is a character or note

In [12]:
def prepare_sequences(notes, n_vocab, sequence_length=100):#tune this hyper parameter
  """
  Prepare the sequences used by the Neural Network
  #FUTURE IMPROVEMENTS
  Try to set this hyper parameter correctly use domain knowledge or supervised pretrainning
    1. Domain Knowledge, pair up similar songs with similar tempos and verse length so that it easily learn that specific catefory
    2. unsupervised does essentially the same thing and trains differnt models/networks for each and first classifies or validates input
      from user for type of song and then does music generation
  """

  # get all pitch names
  pitchnames = sorted(set( item for item in notes))

  #create a dictionary to map pitches to integers
  note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

  network_input = []
  network_output = []

  #create input sequences and the corresponding outputs
  for i in range (0, len(notes) - sequence_length, 1):
    sequence_in = notes[i:i + sequence_length] #this is input sequence
    sequence_out = notes[i+sequence_length] #generating note instead of sequence
    network_input.append([note_to_int[char] for char in sequence_in]) # creates a list of input sequence (notes->integers)
    network_output.append(note_to_int[sequence_out])

  n_patterns = len(network_input) # no of input patterns

  #EVERYONE HATES INPUT SHAPE ERROR LOL
  network_input = np.reshape(network_input, (n_patterns,sequence_length, 1))

  #normalize input , use standard scaler, batch normalization?
  # this is standard 0-1 i guess like pixel/255
  network_input = network_input / float(n_vocab)

  network_output = tf.one_hot(network_output, n_vocab) # one hot encode the output

  """
  one hot encoding a whole vocabulary in NLP is also not widely popular, see and check out some transferable methods instead
  of one-hot encoding the whole vocablulary of notes.
  In piano it is fine, but when you go all the instruments, beats, and other instruments with violin and everthing in between you need a better
  categorical represenetaion.
  """

  return (network_input, network_output)


## Creating a Network Model/Architecture

In [15]:
def create_model(network_input, n_vocab):
  model = tf.keras.models.Sequential([
      #ADD CONV1D if you can
      tf.keras.layers.LSTM(512, input_shape=(network_input.shape[1], network_input.shape[2]),
                          recurrent_dropout=0.3,
                          return_sequences=True),
      tf.keras.layers.LSTM(511, return_sequences=True, recurrent_dropout=0.3),
      tf.keras.layers.LSTM(512),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dropout(0.3),
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dropout(0.3),
      tf.keras.layers.Dense(n_vocab, activation="softmax")
  ])
  model.compile(loss='categorical_crossentropy', optimizer='adam')

  return model

In [7]:
def train(model, network_input, network_output):
  filepath = "weights_improvement--{epoch:02d}-{loss:.4f} - bigger.hdf5"
  checkpoint = tf.keras.callbacks.ModelCheckpoint(
      filepath,
      monitor="loss",
      verbose=0,
      save_best_only=True,
      mode="min"
  )
  early_stopping = tf.keras.callbacks.EarlyStopping(patience=10,
                                                    restore_best_weights=True)
  callbacks_list=[checkpoint, early_stopping]
  model.fit(network_input, network_output, epochs=5, batch_size=32, callbacks=callbacks_list)


## DRIVER FUNCTION/CALL

In [8]:
def driver_function():
  notes = get_notes()

  n_vocab = len(set(notes))
  network_input, network_output = prepare_sequences(notes, n_vocab) #can also set sequence length as an argument, default = 100

  model = create_model(network_input, n_vocab)
  train(model, network_input, network_output)


In [16]:
import timeit
exec_time = timeit.timeit(driver_function, number=1)
print("DONE----"*8)
print(exec_time)

Parsing /content/drive/MyDrive/midi_songs/Eternal_Harvest.mid
Parsing /content/drive/MyDrive/midi_songs/Cids.mid
Parsing /content/drive/MyDrive/midi_songs/FF3_Third_Phase_Final_(Piano).mid
Parsing /content/drive/MyDrive/midi_songs/AT.mid
Parsing /content/drive/MyDrive/midi_songs/FF3_Battle_(Piano).mid
Parsing /content/drive/MyDrive/midi_songs/BlueStone_LastDungeon.mid
Parsing /content/drive/MyDrive/midi_songs/DOS.mid
Parsing /content/drive/MyDrive/midi_songs/EyesOnMePiano.mid
Parsing /content/drive/MyDrive/midi_songs/0fithos.mid
Parsing /content/drive/MyDrive/midi_songs/8.mid
Parsing /content/drive/MyDrive/midi_songs/Ff4-BattleLust.mid
Parsing /content/drive/MyDrive/midi_songs/Fiend_Battle_(Piano).mid
Parsing /content/drive/MyDrive/midi_songs/FF4.mid
Parsing /content/drive/MyDrive/midi_songs/FFVII_BATTLE.mid
Parsing /content/drive/MyDrive/midi_songs/Ff7-Jenova_Absolute.mid
Parsing /content/drive/MyDrive/midi_songs/FF8_Shuffle_or_boogie_pc.mid
Parsing /content/drive/MyDrive/midi_songs/F



Epoch 1/5

  saving_api.save_model(


Epoch 2/5



Epoch 3/5



Epoch 4/5



Epoch 5/5



DONE----DONE----DONE----DONE----DONE----DONE----DONE----DONE----
4134.900314979


In [19]:
4134.900314979/60 #minutes

68.91500524965

Things to improve:-
- use tf data api to get data
- build efficient datapipeline
- use prefetch and create a tf.data.Dataset
- see if you can implement windows
-  create val dataset for early stopping
-