In [1]:
# Import libraries
from mido import MidiFile, MidiTrack

# Import file stuff
import os

# Import basic
import pandas as pd
import numpy as np

In [2]:
# Finding the notes, velocities, and duration from the midi file
def process_midi_file(filepath):
    # Create the midi object
    midi = MidiFile(filepath)

    # Empty lists to hold the values
    notes = []
    velocities = []
    durations = []

    # Iterate through the midi file via track
    # Tack is a list of messages and meta messages that we can pull from
    for track in midi.tracks:
        time = 0

        # Find the time when there is data
        for msg in track:
            time += msg.time
            
            # We only want to time it when the note is pressed
            if msg.type == 'note_on':
                notes.append(msg.note)
                velocities.append(msg.velocity)
                durations.append(time)
                time = 0
                
            # While there isn't a note played, continue through the track
            elif msg.type == 'note_off':
                time += msg.time

    return notes, velocities, durations

In [3]:
filepath = './Composer_Dataset/NN_midi_files_extended/dev/bach/bach344.mid'
notes, velocities, durations = process_midi_file(filepath)

In [4]:
def get_children(a_dir):
    dirs = []
    files = []
    for name in os.listdir(a_dir):
        if os.path.isdir(os.path.join(a_dir, name)):
            dirs.append(name)
        else:
            files.append(name)
    return [dirs,files]

def create_files_table(top_level, out_file):
    temp_comps = []
    temp_songs = []
    temp_paths = []
    
    composer_names, songs = get_children(top_level)

    for composer in composer_names:
        temp_path = top_level + '/' + composer
        temp, songs = get_children(temp_path)
        for song in songs:
            if song != '.DS_Store':
                temp_comps.append(composer)
                temp_paths.append(temp_path + '/' + song)
                temp_songs.append(song.split(".")[0])

    temp_dict = {'Composers': temp_comps, 'Songs': temp_songs, 'Paths': temp_paths}

    table = pd.DataFrame.from_dict(temp_dict)

    table.to_csv('./' + out_file + '.csv',index=False)
    
    return table

In [5]:
devpath = './Composer_Dataset/NN_midi_files_extended/dev/'
testpath = './Composer_Dataset/NN_midi_files_extended/test/'
trainpath = './Composer_Dataset/NN_midi_files_extended/train/'

dev_table = create_files_table(devpath, 'dev_table')
test_table = create_files_table(testpath, 'test_table')
train_table = create_files_table(trainpath, 'train_table')

In [6]:
dev_table.head()

Unnamed: 0,Composers,Songs,Paths
0,bach,bach344,./Composer_Dataset/NN_midi_files_extended/dev/...
1,bach,bach348,./Composer_Dataset/NN_midi_files_extended/dev/...
2,bach,bach354,./Composer_Dataset/NN_midi_files_extended/dev/...
3,bach,bach384,./Composer_Dataset/NN_midi_files_extended/dev/...
4,bartok,bartok410,./Composer_Dataset/NN_midi_files_extended/dev/...


In [37]:
def process_midi_file_enhanced(filepath, composer, song_name):
    # number of channels possible in midi file
    num_channels = 16
    midi = MidiFile(filepath)

    notes = []
    velocities = []
    start_note = []
    stop_note = []
    is_on = []
    channel = []

    for track in midi.tracks:
        time = np.zeros(num_channels)
        for msg in track:
            if msg.type in ['note_on', 'note_off']:
                notes.append(msg.note)
                velocities.append(msg.velocity)
                start_note.append(time[msg.channel])
                time[msg.channel] += msg.time
                stop_note.append(time[msg.channel])
                channel.append(msg.channel)
            if msg.type == 'note_on':
                is_on.append(1)
            elif msg.type == 'note_off':
                is_on.append(0)
    
    temp_dict = {'note': notes, 'velocity': velocities, 'start_note': start_note, 
             'stop_note': stop_note, 'is_on': is_on, 'channel': channel}
    temp_pd = pd.DataFrame.from_dict(temp_dict)
    temp_pd['composer'] = composer
    temp_pd['song_name'] = song_name

    return temp_pd

In [8]:
filepath = './Composer_Dataset/NN_midi_files_extended/dev/bach/bach344.mid'
midi = MidiFile(filepath)

In [54]:
msg= midi.tracks[1]

In [46]:
exp = []
for row in dev_table.iterrows():
    temp_pd = process_midi_file_enhanced(row[1]['Paths'], row[1]['Composers'], row[1]['Songs'])
    exp.append(temp_pd)


In [50]:
df_final = pd.concat(exp,ignore_index=True)    

In [51]:
df_final

Unnamed: 0,note,velocity,start_note,stop_note,is_on,channel,composer,song_name
0,43,127,0.0,88.0,1,7,bach,bach344
1,62,127,0.0,177.0,1,2,bach,bach344
2,62,127,177.0,350.0,0,2,bach,bach344
3,64,127,0.0,5.0,1,4,bach,bach344
4,64,127,5.0,178.0,0,4,bach,bach344
...,...,...,...,...,...,...,...,...
221523,38,64,165376.0,165376.0,1,0,schumann,schumann238
221524,26,0,165376.0,166400.0,0,0,schumann,schumann238
221525,38,0,166400.0,166400.0,0,0,schumann,schumann238
221526,66,0,166400.0,166912.0,0,0,schumann,schumann238


In [52]:
df_final.iloc[df_final['stop_note'].idxmax()] 

note                 66
velocity            119
start_note    1402800.0
stop_note     1402920.0
is_on                 0
channel               0
composer         hummel
song_name     hummel327
Name: 129480, dtype: object

In [53]:
df_final.to_csv('./notes_table_dev.csv')