In [48]:
import pandas as pd
import numpy as np
import pretty_midi
import music21

In [49]:
import os
os.chdir("/Users/desireewaugh/Desktop/MIT/Courses/6.883 - Modeling with ML/Projects/Final Project/MusicGenerator/data")

## Play abc file

In [50]:
# Run "pip install music21" in your chosen directory
from music21 import *

In [4]:
music_file = converter.parse('rockethornpipe-1.abc')

In [5]:
# Need to download pygame to play files: "pip install pygame"
sp = midi.realtime.StreamPlayer(music_file)
sp.play()

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


## With midi files

In [4]:
# Data from: https://magenta.tensorflow.org/datasets/groove#dataset
# Possible dataset: https://drive.google.com/file/d/14e0MCJD7RH_m7CpsFZWPIpO0WgQrwi64/view

In [78]:
midi = pretty_midi.PrettyMIDI("hotel_california.mid")
piano_midi = midi.instruments[2]
piano_roll = piano_midi.get_piano_roll()

piano_roll

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

### Featurization Option 1: Matrix with note start, end, pitch, and velocity
### Each row is a separate observation

In [190]:
# Reference: https://www.audiolabs-erlangen.de/resources/MIR/FMP/C1/C1S2_MIDI.html
def encode_midi(midi_file):
    midi_info = []
    for instrument in midi_file.instruments:
        for note in instrument.notes:
            start = note.start
            end = note.end
            pitch = note.pitch
            velocity = note.velocity
            midi_info.append([start, end, pitch, velocity, 
                              instrument.program, int(instrument.is_drum), instrument.name])

    df = pd.DataFrame(midi_info, columns=['Start', 'End', 'Pitch', 'Velocity', 
                                                   'Program', 'is_drum', 'Name'])
    midi_matrix = df[['Start', 'End', 'Pitch', 'Velocity', 'Program', 'is_drum']].values
    return midi_matrix
    
test_matrix = encode_midi(midi)
test_matrix[1,:]

array([ 1.78149313,  1.78961   , 42.        , 72.        ,  0.        ,
        1.        ])

In [205]:
# Create midi file from a dataframe
# Reference: http://craffel.github.io/pretty-midi/

def decode_midi(midi_matrix, instrument_name):
    created_midi = pretty_midi.PrettyMIDI()

    # Add instruments
    for i in range(midi_matrix.shape[0]):
        instrument = pretty_midi.Instrument(program=midi_matrix[i,4], 
                                            is_drum=bool(midi_matrix[i,5]),
                                            name=instrument_name)

        note = pretty_midi.Note(  
            start=midi_matrix[i, 0], 
            end=midi_matrix[i, 1],
            pitch=midi_matrix[i, 2],
            velocity=midi_matrix[i, 3])
        instrument.notes.append(note)
        created_midi.instruments.append(instrument)

    return created_midi

new_midi = decode_midi(test_matrix, "Acoustic Grand Piano")

# Save as midi file
new_midi.write("test_midi.midi")

TypeError: data byte must be int

In [195]:
# Play written file - this does not sounds great, but it might be good enough to train a NN
b = converter.parse("test_midi.midi")
sp = music21.midi.realtime.StreamPlayer(b)
sp.play()

In [14]:
# Play actual file - sounds much better
b = converter.parse("hotel_california.mid")
sp = music21.midi.realtime.StreamPlayer(b)
sp.play()

## Encoding

In [166]:
# Source: https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5

midi_file = music21.converter.parse("hotel_california.mid")

parts = music21.instrument.partitionByInstrument(midi_file)

pitches = []
volumes = []
offsets = []

# Get list of each note in the midi file
for p in parts:
    for element in p:
        if type(element) == music21.note.Note:
            pitches.append(str(element.pitch))
            volumes.append(element.volume.velocity)
            offsets.append(element.offset)
        if type(element) == music21.chord.Chord:
            for note in element:
                note.offset = element.offset
                pitches.append(str(note.pitch))
                volumes.append(note.volume.velocity)
                offsets.append(note.offset)
    
    
df = pd.DataFrame(list(zip(pitches, volumes, offsets)), columns=['Pitch', 'Volume', 'Offset'])

num_cols = (np.max(df['Offset']) - np.min(df['Offset']))*4

df.head()

Unnamed: 0,Pitch,Volume,Offset
0,C2,103,4
1,F#2,72,4
2,A3,84,4
3,G2,127,4
4,D4,120,4


In [173]:
music_df = pd.pivot_table(df, values='Volume', index='Pitch', columns='Offset')
music_df = music_df.fillna(0)
music_df

Offset,4.0,4.25,4.5,4.75,5.0,5.25,5.5,5.75,6.0,6.5,...,33.75,34.0,34.25,103/3,34.5,34.75,35.0,35.5,35.75,36.0
Pitch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A3,84.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,84.0
A4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.0
A5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,93.0,95.0,0.0,0.0,0.0,82.0,0.0,0.0,79.0,0.0
B-4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,99.0,0.0,0.0,0.0,90.0,109.0,0.0,92.0,103.0,0.0
B-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108.0
B2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97.0


In [177]:
pitch_list = sorted(np.unique(pitches))
pitch_list

['A2',
 'A3',
 'A4',
 'A5',
 'B-2',
 'B-3',
 'B-4',
 'B-5',
 'B1',
 'B2',
 'B3',
 'B4',
 'B5',
 'C#2',
 'C#3',
 'C#4',
 'C#5',
 'C#6',
 'C2',
 'D2',
 'D3',
 'D4',
 'D5',
 'D6',
 'E1',
 'E2',
 'E3',
 'E4',
 'E5',
 'F#1',
 'F#2',
 'F#3',
 'F#4',
 'F#5',
 'F2',
 'G#2',
 'G1',
 'G2',
 'G3',
 'G4',
 'G5']