In [2]:
from IPython import display
import collections
import datetime
import fluidsynth
import glob
import numpy as np
import pathlib
import pandas as pd
import pretty_midi
import seaborn as sns
import tensorflow as tf

from matplotlib import pyplot as plt
from typing import Dict, List, Optional, Sequence, Tuple

In [3]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# Sampling rate for audio playback
_SAMPLING_RATE = 16000

# Download the maestro dataset of 1282 files
# data_dir = pathlib.Path('data/maestro-v2.0.0')
# if not data_dir.exists():
#   tf.keras.utils.get_file(
#       'maestro-v2.0.0-midi.zip',
#       origin='https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip',
#       extract=True,
#       cache_dir='.', cache_subdir='data',
#   )

data_dir = pathlib.Path('/Volumes/MAGIC1/CS50/myMusicGen/data/mj.mid')
if not data_dir.exists():
  tf.keras.utils.get_file(
      'mj.mid',
      origin='https://www.midiworld.com/download/658',
      extract=True,
      cache_dir='.', cache_subdir='data',
  )

# The data above contains 1200+ Midi files
filenames = str(data_dir)
# print('Number of files:', len(filenames))

# Process a MIDI file
sample_file = filenames
print(sample_file)
# print(sample_file)

# generate a prettyMIDI object for the sample MIDI file
pm = pretty_midi.PrettyMIDI(sample_file)

/Volumes/MAGIC1/CS50/myMusicGen/data/mj.mid


In [4]:
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=90):
    waveform = pm.fluidsynth(fs=float(_SAMPLING_RATE))
    # Take a sample of the generated waveform to mitigate kernel resets
    waveform_short = waveform[:seconds*_SAMPLING_RATE]
    return display.Audio(waveform_short, rate=_SAMPLING_RATE)

In [5]:
display_audio(pm)

In [6]:
print('Number of instruments:', len(pm.instruments))
for instrument in pm.instruments:
    # instrument = pm.instruments[instrument]
    instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
    print('Instrument name:', instrument_name)
    for i, note in enumerate(instrument.notes[:10]):
        note_name = pretty_midi.note_number_to_name(note.pitch)
        duration = note.end - note.start
        print(f'{i}: pitch={note.pitch}, note_name={note_name},'
                f' duration={duration:.4f}')

Number of instruments: 13
Instrument name: Acoustic Grand Piano
0: pitch=42, note_name=F#2, duration=0.0603
1: pitch=42, note_name=F#2, duration=0.0603
2: pitch=42, note_name=F#2, duration=0.0603
3: pitch=42, note_name=F#2, duration=0.0603
4: pitch=36, note_name=C2, duration=0.0603
5: pitch=40, note_name=E2, duration=0.0603
6: pitch=40, note_name=E2, duration=0.0603
7: pitch=40, note_name=E2, duration=0.0603
8: pitch=40, note_name=E2, duration=0.0603
9: pitch=36, note_name=C2, duration=0.0603
Instrument name: Electric Guitar (clean)
0: pitch=61, note_name=C#4, duration=0.0548
1: pitch=70, note_name=A#4, duration=0.0548
2: pitch=66, note_name=F#4, duration=0.0713
3: pitch=66, note_name=F#4, duration=0.0658
4: pitch=70, note_name=A#4, duration=0.0713
5: pitch=61, note_name=C#4, duration=0.0713
6: pitch=61, note_name=C#4, duration=0.0384
7: pitch=70, note_name=A#4, duration=0.0384
8: pitch=66, note_name=F#4, duration=0.0548
9: pitch=58, note_name=A#3, duration=0.0713
Instrument name: Elec

In [7]:
def midi_to_notes(midi_file: str) -> pd.DataFrame:
  pm = pretty_midi.PrettyMIDI(midi_file)
  notes = collections.defaultdict(list)
  instrument_names = []
  for instrument in pm.instruments:
    instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
    instrument_names.append(instrument_name)
    # print(instrument_name)
    # Sort the notes by start time
    sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
    prev_start = sorted_notes[0].start
    # print(sorted_notes)

    for note in sorted_notes:
        start = note.start
        end = note.end
        notes['pitch'].append(note.pitch)
        notes['start'].append(start)
        notes['end'].append(end)
        notes['step'].append(start - prev_start)
        notes['duration'].append(end - start)
        prev_start = start
  
  return pd.DataFrame({name: np.array(value) for name, value in notes.items()}), instrument_names

In [8]:
raw_notes, instrument_names_out = midi_to_notes(sample_file)
print(instrument_names_out)
raw_notes.head()


['Acoustic Grand Piano', 'Electric Guitar (clean)', 'Electric Bass (finger)', 'Ocarina', 'Electric Piano 2', 'String Ensemble 1', 'Lead 8 (bass + lead)', 'Choir Aahs', 'French Horn', 'Tuba', 'Pad 1 (new age)', 'Brass Section', 'Baritone Sax']


Unnamed: 0,pitch,start,end,step,duration
0,42,2.105264,2.165571,0.0,0.060307
1,42,2.63158,2.691887,0.526316,0.060307
2,42,3.157896,3.218203,0.526316,0.060307
3,42,3.684212,3.744519,0.526316,0.060307
4,36,4.210528,4.270835,0.526316,0.060307


In [9]:
get_note_names = np.vectorize(pretty_midi.note_number_to_name)
sample_note_names = get_note_names(raw_notes['pitch'])
sample_note_names[:10]

array(['F#2', 'F#2', 'F#2', 'F#2', 'C2', 'E2', 'E2', 'E2', 'E2', 'C2'],
      dtype='<U3')

In [12]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str, 
  instrument_names: List[str],
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:
  pm = pretty_midi.PrettyMIDI()
  
  for instrument_name in instrument_names:
    instrument = pretty_midi.Instrument(
        program=pretty_midi.instrument_name_to_program(
            instrument_name))
    # print(f"Hey! {instrument_name}")

    instrument_notes = [] # to store notes for the current inst 

    prev_start = 0
    for i, note in notes.iterrows():
      start = float(prev_start + note['step'])
      end = float(start + note['duration'])
      note_obj = pretty_midi.Note(
          velocity=velocity,
          pitch=int(note['pitch']),
          start=start,
          end=end,
      )
      instrument_notes.append(note_obj)
      prev_start = start
   
    instrument.notes.extend(instrument_notes)  # add all notes to the instrument
    pm.instruments.append(instrument)

    
  pm.write(out_file)
  return pm


In [13]:
example_file = 'example.midi'
example_pm = notes_to_midi(
    raw_notes, out_file=example_file, instrument_names=instrument_names_out)

In [14]:

display_audio(example_pm)