<a href="https://colab.research.google.com/github/DLOHai/Burgundymusic/blob/master/Midi_Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('./gdrive')

Drive already mounted at ./gdrive; to attempt to forcibly remount, call drive.mount("./gdrive", force_remount=True).


In [0]:
import mido
import numpy as np
import os
from os import listdir
from os.path import isfile, join

## Dataset Path

In [4]:
# MAESTRO Dataset 위치 (Customize the path)
maestro_dir = './gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0'

# Directory list
data_dirs = []
for year in ['2004', '2006', '2008', '2009', '2011', '2013', '2014', '2015', '2017', '2018']:
    data_dirs.append(maestro_dir + '/' + str(year))


# 디렉토리 내 파일이 midi 파일이면 파일 목록 리스트에 추가
data_files = []
for data_dir in data_dirs:
    data_files += [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f)) if 'mid' in f]

data_files.sort()
print('The number of midi files : ', len(data_files))

The number of midi files :  1282


In [0]:
def get_eventlist(data_file):
    
    '''
    event : [Time, Type(ON, OFF, CC), Value1, Value2]
    '''
    
    ON = 1
    OFF = 0
    CC = 2

    midi = mido.MidiFile(data_file)

    current_time = 0
    eventlist = []
    cc = False
    for msg in midi:
        #print(msg)
        current_time += msg.time

         # NOTE ON CASE
        if msg.type is 'note_on' and msg.velocity > 0:
            event = [current_time, ON, msg.note, msg.velocity]
            eventlist.append(event)

         # NOTE OFF CASE        
        elif msg.type is 'note_off' or (msg.type is 'note_on' and msg.velocity == 0):
            event = [current_time, OFF, msg.note, msg.velocity]
            eventlist.append(event)
            
        if msg.type is 'control_change':
            
            # 64 sustain pedal
            if msg.control != 64:
                continue
            
            if cc == False and msg.value > 0:
                cc = True
                event = [current_time, CC, 0, 1]
                eventlist.append(event)
                
            elif cc == True and msg.value == 0:
                cc = False
                event = [current_time, CC, 0, 0]
                eventlist.append(event)
                
    eventlist = np.array(eventlist)
    return eventlist

In [6]:
# Sample print
index = np.random.randint(0, len(data_files))
eventlist = get_eventlist(data_files[index])
print(eventlist)

[[  0.303125     2.           0.           1.        ]
 [  0.99166667   1.          52.          33.        ]
 [  0.99791667   1.          64.          48.        ]
 ...
 [150.81354167   0.          76.           0.        ]
 [150.834375     0.          68.           0.        ]
 [151.10833333   2.           0.           0.        ]]


### Single MIDI Parsing Test

In [0]:
midi_path = '/content/gdrive/My Drive/Colab Notebooks/midi/AbdelmoulaJS01.MID'
midi = mido.MidiFile(midi_path)

In [8]:
print('charset: {}, debug: {}, length: {}, ticks_per_beat: {}, type: {}'.format(midi.charset, midi.debug, midi.length, midi.ticks_per_beat, midi.type))

charset: latin1, debug: False, length: 855.3370185939978, ticks_per_beat: 384, type: 0


In [0]:
#midi.print_tracks()

In [0]:
#midi.save()

In [9]:
print(midi.tracks)

[<midi track 'AbdelmoulaJS_JanacekMists' 25946 messages>]


In [10]:
tempo = midi.tracks[0][15].tempo # Meta Message 중 set_tempo 가져옴 (microseconds a quarter note)
ticks_per_beat = midi.ticks_per_beat # 정수
us_per_tick = tempo/ticks_per_beat  # 1 tick 당 절대시간 (microseconds)

print('tempo: {}us\nticks per beat: {} ticks/beat\nus_per_tick: {} us'.format(tempo, ticks_per_beat, us_per_tick))

tempo: 512820us
ticks per beat: 384 ticks/beat
us_per_tick: 1335.46875 us


In [0]:
for idx, msg in enumerate(midi):
    if msg.type is 'note_on' and msg.velocity == 0:
        print(idx,':', msg)
        ## note_on 인데, velocity == 0 없는듯

### timing

`midi.tracks`의 message를 가져오면 time 이 tick의 개수(정수 0~125) 로 표현됨

   e.g) 어떤 event의 `msg.time` 이 15일 때,이전 event와의 시간차이는 15 tick이다!

`midi` 자체(header chunk가 포함된)의 message 를 가져오면 절대적인 시간으로 표현됨

여기서의 time = (midi.tracks 의 time tick의 개수  : 정수) * `(microseconds per tick)` * 10^-6

즉, picosecond(ps - 10^-12) 로 측정된 시간정보

  e.g.) 어떤 event의 `msg.time` 이 0.0234523일 때, 이전 event와의 시간차이는 0.0234523 ps이다.

In [12]:
for idx, msg in enumerate(zip(midi.tracks[0], midi)):
    #msg -> mido.messages.messages.Message type
    print('{}: {} ticks, {}ps, {}ps'.format(idx, msg[0].time, msg[1].time, msg[0].time*us_per_tick*(0.1**6)))

0: 0 ticks, 0ps, 0.0ps
1: 0 ticks, 0ps, 0.0ps
2: 0 ticks, 0ps, 0.0ps
3: 0 ticks, 0ps, 0.0ps
4: 0 ticks, 0ps, 0.0ps
5: 0 ticks, 0ps, 0.0ps
6: 0 ticks, 0ps, 0.0ps
7: 0 ticks, 0ps, 0.0ps
8: 0 ticks, 0ps, 0.0ps
9: 0 ticks, 0ps, 0.0ps
10: 0 ticks, 0ps, 0.0ps
11: 0 ticks, 0ps, 0.0ps
12: 0 ticks, 0ps, 0.0ps
13: 0 ticks, 0ps, 0.0ps
14: 0 ticks, 0ps, 0.0ps
15: 0 ticks, 0ps, 0.0ps
16: 0 ticks, 0ps, 0.0ps
17: 0 ticks, 0ps, 0.0ps
18: 1 ticks, 0.0013354687499999999ps, 0.0013354687500000005ps
19: 1 ticks, 0.0013354687499999999ps, 0.0013354687500000005ps
20: 1 ticks, 0.0013354687499999999ps, 0.0013354687500000005ps
21: 0 ticks, 0ps, 0.0ps
22: 0 ticks, 0ps, 0.0ps
23: 0 ticks, 0ps, 0.0ps
24: 0 ticks, 0ps, 0.0ps
25: 0 ticks, 0ps, 0.0ps
26: 0 ticks, 0ps, 0.0ps
27: 0 ticks, 0ps, 0.0ps
28: 0 ticks, 0ps, 0.0ps
29: 0 ticks, 0ps, 0.0ps
30: 0 ticks, 0ps, 0.0ps
31: 0 ticks, 0ps, 0.0ps
32: 0 ticks, 0ps, 0.0ps
33: 0 ticks, 0ps, 0.0ps
34: 0 ticks, 0ps, 0.0ps
35: 0 ticks, 0ps, 0.0ps
36: 1 ticks, 0.00133546874999999

In [16]:
from tqdm import tqdm_notebook as tqdm
import os

dataset_dir = 'dataset_cc'

# Eventlist file will be saved in dataset_dir
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)

# converting loop
for i in tqdm(range(len(data_files))):
    print(data_files[i])
    eventlist = get_eventlist(data_files[i])
    print(eventlist.shape)
    
    save_file = dataset_dir + '/' + str(i)
    data = {'eventlist': eventlist}
    np.savez(save_file, **data, allow_pickle=False)

HBox(children=(IntProgress(value=0, max=1282), HTML(value='')))

./gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.mid
(17273, 4)
./gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.mid
(2477, 4)
./gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav.mid
(5864, 4)
./gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_10_Track10_wav.mid
(13519, 4)
./gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0/2004/MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_02_Track02_wav.mid
(38238, 4)
./gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0/2004/MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_03_Track03_wav.mid
(12024, 4)
./gdrive/My Drive/Colab Notebooks/midi/maestro-v2.0.0/2004/MIDI-Unprocessed_SMF_05_R

KeyboardInterrupt: ignored