In [None]:
%%capture
!sudo apt install -y fluidsynth
!pip install --upgrade pyfluidsynth
!pip install pretty_midi

In [56]:
import tensorflow as tf
from tensorflow import keras
import keras.utils

import pandas as pd
import random
import numpy as np

import fluidsynth
import pretty_midi
from IPython import display

from music21 import converter, instrument, note, chord

In [None]:
print ("TF Version:", tf.__version__)
print ("Keras Version:", keras.__version__)

TF Version: 2.7.0
Keras Version: 2.7.0


In [None]:
from google.colab import drive
drive.mount("/content/gdrive/")

# location of data folder
ALLDATA_DIR = "/content/gdrive/MyDrive/Github/sm-caltech-stem-machine-learning/data/"

# location of MAESTRO data folder
DATA_DIR = "/content/gdrive/MyDrive/Github/sm-caltech-stem-machine-learning/data/maestro-v3.0.0/"

Mounted at /content/gdrive/


In [16]:
metadata = pd.read_csv(DATA_DIR + "maestro-v3.0.0.csv");
# TO DELETE
metadata = metadata.head()
metadata.head()

Unnamed: 0,canonical_composer,canonical_title,split,year,midi_filename,audio_filename,duration
0,Alban Berg,Sonata Op. 1,train,2018,2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R...,2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R...,698.66116
1,Alban Berg,Sonata Op. 1,train,2008,2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MI...,2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MI...,759.518471
2,Alban Berg,Sonata Op. 1,train,2017,2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-...,2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-...,464.649433
3,Alexander Scriabin,"24 Preludes Op. 11, No. 13-24",train,2004,2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MI...,2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MI...,872.640588
4,Alexander Scriabin,"3 Etudes, Op. 65",validation,2006,2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MI...,2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MI...,397.857508


In [None]:
# Sampling rate for audio playback
_SAMPLING_RATE = 16000

def random_midi(df):
  rn = random.randint(0, df.shape[0])
  return DATA_DIR + metadata.iloc[rn]["midi_filename"]

def display_audio(seconds=30):
  midi_file = random_midi(metadata)
  pm = pretty_midi.PrettyMIDI(midi_file) 
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  
  # Take a sample of the generated waveform to mitigate kernel resets
  waveform_short = waveform[:seconds*_SAMPLING_RATE]
  return display.Audio(waveform_short, rate=_SAMPLING_RATE)
  
display_audio(60)

Output hidden; open in https://colab.research.google.com to view.

In [58]:
notes = []

for file in metadata["midi_filename"]:
  midi = converter.parse(DATA_DIR + file)
  notes_to_parse = None

  # Check the amount of instrument
  score = instrument.partitionByInstrument(midi)
  if len(score.parts) == 1:
    notes_to_parse = midi.flat.notes
  else:
    notes_to_parse = score.parts[0].recurse()

  for sound in notes_to_parse:
    if isinstance(sound, note.Note):
      # String of pitch name added to notes (ex 'F-2')
      notes.append(str(sound.pitch))
    elif isinstance(sound, chord.Chord):
      # Pitch ID of each note in chord, separated by period added to notes as string
      notes.append('.'.join(str(n) for n in sound.normalOrder))
"""
Pitch ID:

C to B ---> 1 to 12
C Major Triad ---> '0.4.7'
"""

# notes is an array of ALL the notes/chords of ALL the midi in the data

  

"\nPitch ID:\n\nC to B ---> 1 to 12\nC Major Triad ---> '0.4.7'\n"

In [57]:
sequence_length = 100

# all pitch names
pitchnames = sorted(set(item for item in notes))
n_vocab = len(pitchnames)

# dictionary to map pitches to integers
note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

input = []
output = []

for i in range(0, len(notes) - sequence_length):
  input_seq = notes[i:i + sequence_length]  # ex. notes at index 0 to sequence_length
  output_seq = notes[i+sequence_length]     # ex. note at index sequence_length + 1

  input.append([note_to_int[item] for item in input_seq])  # input_seq converted to array of #, then added to input
  output.append(note_to_int[output_seq]) # output_seq converted to a #, then added to output

# reshaping and normalizing
n_patterns = len(input)
input = np.reshape(input, (n_patterns, sequence_length))  # input reshaped to (n_patterns) rows and (sequence_length) columns
input = input / float(n_vocab)

# one-hot encoding
output = keras.utils.np_utils.to_categorical(output)

print(input[0])


[0.99109792 0.85163205 0.9347181  0.81008902 0.82789318 0.99109792
 0.99406528 0.99406528 0.84866469 0.79228487 0.99109792 0.89317507
 0.89317507 0.77448071 0.80712166 0.99109792 0.81008902 0.87240356
 0.91097923 0.97032641 0.78931751 0.99109792 0.99109792 0.83086053
 0.99109792 0.83086053 0.92581602 0.93175074 0.80118694 0.8694362
 0.93175074 0.92581602 0.8041543  0.87240356 0.8694362  0.79228487
 0.93175074 0.84866469 0.95548961 0.82789318 0.95252226 0.77448071
 0.91394659 0.91394659 0.79228487 0.93175074 0.8694362  0.60830861
 0.95548961 0.91394659 0.89317507 0.89020772 0.79228487 0.93175074
 0.79228487 0.80712166 0.77744807 0.89317507 0.87240356 0.92878338
 0.83086053 0.95252226 0.83086053 0.79525223 0.52225519 0.9347181
 0.87240356 0.59940653 0.83382789 0.59940653 0.77744807 0.64391691
 0.05934718 0.4925816  0.77744807 0.71513353 0.77448071 0.83086053
 0.89317507 0.59940653 0.77744807 0.50445104 0.77151335 0.81008902
 0.35905045 0.77744807 0.115727   0.99109792 0.23738872 0.302670