### Data Files Path Preparation

In [5]:
import os
from os import listdir
from os.path import isfile, join

# Modify this path to your own MAESTRO dataset
maestro_dir = '../../midi_output'

data_dirs = []
for year in range(1956, 2021):
    data_dirs.append(maestro_dir + '/' + str(year))

data_files = []
for data_dir in data_dirs:
    data_files += [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f)) if 'mid' in f]

data_files.sort()

print('total midi files : ', len(data_files))

total midi files :  1593


### Event Extract from Midi File

In [6]:
import mido
import numpy as np


def get_eventlist(data_file):
    ON = 1
    OFF = 0
    CC = 2

    midi = mido.MidiFile(data_file)

    current_time = 0
    eventlist = []
    cc = False
    for msg in midi:
        #print(msg)
        current_time += msg.time

         # NOTE ON CASE
        if msg.type is 'note_on' and msg.velocity > 0:
            event = [current_time, ON, msg.note, msg.velocity]
            eventlist.append(event)

         # NOTE OFF CASE        
        elif msg.type is 'note_off' or (msg.type is 'note_on' and msg.velocity == 0):
            event = [current_time, OFF, msg.note, msg.velocity]
            eventlist.append(event)
            
        if msg.type is 'control_change':
            
            if msg.control != 64:
                continue
            
            if cc == False and msg.value > 0:
                cc = True
                event = [current_time, CC, 0, 1]
                eventlist.append(event)
                
            elif cc == True and msg.value == 0:
                cc = False
                event = [current_time, CC, 0, 0]
                eventlist.append(event)
                
    eventlist = np.array(eventlist)
    return eventlist

index = np.random.randint(0, len(data_files))
print(index)
eventlist = get_eventlist(data_files[index])
print(eventlist)

22
[[1.53672683e-01 1.00000000e+00 7.50000000e+01 1.00000000e+02]
 [3.62292285e-01 0.00000000e+00 7.50000000e+01 1.00000000e+02]
 [3.77034142e-01 1.00000000e+00 4.90000000e+01 1.00000000e+02]
 ...
 [2.82606298e+02 0.00000000e+00 5.90000000e+01 1.00000000e+02]
 [2.82606298e+02 1.00000000e+00 6.10000000e+01 1.00000000e+02]
 [2.82745229e+02 0.00000000e+00 6.10000000e+01 1.00000000e+02]]
  if msg.type is 'note_on' and msg.velocity > 0:
  elif msg.type is 'note_off' or (msg.type is 'note_on' and msg.velocity == 0):
  elif msg.type is 'note_off' or (msg.type is 'note_on' and msg.velocity == 0):
  if msg.type is 'control_change':


### Midifile to EventListfile

In [9]:
from tqdm import tqdm_notebook as tqdm
import os

dataset_dir = 'dataset'

if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)

for i in tqdm(range(len(data_files))):
    print(data_files[i])
    eventlist = get_eventlist(data_files[i])
    print(eventlist.shape)
    
    save_file = dataset_dir + '/' + str(i)
    data = {'eventlist': eventlist}
    np.savez(save_file, **data, allow_pickle=False)

ModuleNotFoundError: No module named 'tqdm'