In [None]:
import numpy as np
from music21 import converter, instrument, note, chord, meter, duration, stream
import music21
import os
from tqdm import tqdm
from music21 import common

In [None]:
def decode_midi(data, header=None):
    strm = stream.Stream()
    
    for element in header[1:]: # TODO: right now I'm skipping the instrument because it adds like 10 empty measures for some reason
        strm.append(element)

    current_note = None
    current_length = 0.25

    for t in range(data.shape[1]):
        nothing = True

        for p, val in enumerate(data[:,t]):
            if val:
                if p < 128:
                    current_note = note.Note()
                    current_note.pitch.midi = p
                    current_length = 0.25

                    nothing = False
                elif p == 128:
                    pass #TODO: something with rests
                else:
                    if nothing:
                        current_length += 0.25
                    current_note.duration = duration.Duration(current_length)
                    #print('{}\t{}'.format(current_note.pitch, current_note.duration))
                    strm.append(current_note)
                    current_note = None
                    nothing = False
        
        if nothing and current_note: # if there was no data in this time step and there is a current note increase it's length by 1 16th
            current_length += 0.25
    
    return strm

In [77]:
from collections import defaultdict
import math
def insert_note(arr, idx, pitch, note_off): 
    arr[idx, pitch] = True

    if note_off > idx:
        arr[idx + 1 : note_off + 1, -2] = True

def beat_to_total_beat(offset_16th, beat, single_note_length):
    return int(offset_16th + (beat - 1) * single_note_length)

def get_windows(notes, beats, window_size, stride_length):
    windows = [[] for _ in range(int((total_beats - window_size) / stride_length))]
    
    last_start = 0
    for idx in range(len(windows)):
        start = idx * stride_length
        end = start + window_size
        for i, beat in enumerate(beats[last_start:]):
            if beat >= start and beat < end:
                if not windows[idx]:
                    last_start = last_start + i
                pitch, note_off = notes[beat]
                windows[idx].append((beat - start, pitch, note_off - start))
            elif beat >= end:
                break
    
    return [window for window in windows if len(window) > 1]

def fill_data(total_beats, notes, beats):
    data = np.zeros((total_beats, 130), dtype=np.bool)

    if beats[0] != 0: # if the first note isn't on the first beat fill the missing space with rests
        data[:beats[0], -1] = True

    t = data.shape[0]

    for i, beat in enumerate(beats):
        pitch, note_off = notes[beat]

        if i <= len(beats) - 2:
            next_beat = beats[i + 1]
            if note_off >= next_beat: # if this note continues past the next one clip it
                note_off = next_beat - 1
            elif next_beat - note_off > 1: # if there is at least one beat between the end of this note and the beginning of the next
                data[note_off + 1 : next_beat, -1] = True # fill the intervening beats with rests
        else:   # fill everything after the last note off with rests
            if note_off < t - 1:
                data[note_off + 1 : ,-1] = True
            elif note_off >= t: # clip the last note to the end of the last measure
                note_off = t - 1
        if beat < t:
            insert_note(data, beat, pitch, note_off)
    return data

def extract_windows(data, window_size=32, stride_length=16):
    t = data.shape[0]
    windows = []

    start = 0

    while start + window_size < t:
        window = np.array(data[start:start + window_size])
        notes = np.argmax(window, axis=1)

        for i, note in enumerate(notes):
            if note == 128:
                window[i,128] = False
                window[i,129] = True
            else:
                break

        windows.append(window)

        start += stride_length
    
    return windows

def window_filter(window):
    notes = np.argmax(window, axis=1)
    count = 0
    for note in notes:
        if note < 128:
            count += 1
        
        if count == 2:
            return True
    
    return False

def encode_midi(measureMap, t=32, measure_length=16, single_note_length = 4, header=None):
    measure_count = 0
    fill_header = True

    last_beat = -1
    sustain_until = -1
    notes = defaultdict(tuple)
    for key in measureMap.keys():
        i = key * single_note_length
        
        for element in measureMap[key][0]:
            if isinstance(element, note.Note):
                fill_header = False
                idx = beat_to_total_beat(i, element.beat, single_note_length)
                if not notes[idx]:
                    notes[idx] = (int(element.pitch.midi), idx + int(element.duration.quarterLength * 4) - 1)
            elif fill_header and header != None:
                print(type(element))
                header.append(element)
        

    beats = sorted([key for key, value in notes.items() if value != ()])
    
    if not beats:
        return []
    
    total_beats = beats[-1]

    if total_beats % 16 != 0:
        total_beats += total_beats % 16
    
    data = fill_data(total_beats, notes, beats)
    windows = extract_windows(data, window_size=t, stride_length=t//2)
    windows = list(filter(window_filter, windows))

    return windows

def load_single(path, measures=2, desired_instrument='Guitar'):
    midi = converter.parse(path)
    ts = midi.getTimeSignatures()[0]
    num = ts.numerator
    denom = ts.denominator

    if num != 4 or denom != 4:
        return []

    measure_length = None # length of measure in 16ths
    single_note_length = None # length of a single note in 16ths

    measure_length = 16
    single_note_length = 4


    notes_to_parse = None
    score = instrument.partitionByInstrument(midi)
    if score: # file has instrument parts
        correctPart = None

        for part in score.parts:
            instr = str(part.getInstrument())
            
            if instr == desired_instrument:
                correctPart = part
                break

        if not correctPart:
            correctPart = score.parts[0]

        correctPart.makeMeasures(inPlace=True)
        measureMap = correctPart.measureOffsetMap()

    else: # file has notes in a flat structure
        #notes_to_parse = midi.flat.notes
        measureMap = midi.flat.measureOffsetMap()

    """ print(measureMap)
    return """
    return encode_midi(measureMap, t=measures*16, measure_length=measure_length, single_note_length = single_note_length)

def parse(filename):
    return converter.parse(filename)
    """ try:
        return converter.parse(filename)
    except:
        return None """

def load_data(folder, measures=2, instrument='Guitar'):
    filenames = [[folder + f, measures, instrument] for f in os.listdir(folder)]
    results = common.runParallel(filenames, load_single, updateFunction=True, unpackIterable=True)
    #results = [parse(f) for f in filenames]
    #result = Parallel(n_jobs=4, backend="threading", verbose=1)(delayed(load_single)(folder + f, measures=16) for f in filenames)
    """ data = []
    for midi in tqdm(results):
        if midi:
            data.append(load_single(midi)) """
    
    data = []
    for result in results:
        data.extend(result)
    
    return np.array(data)



In [78]:
parent = 'data/train/'
for filename in tqdm(os.listdir(parent)[28:100]):
    path = parent + filename
    data = load_single(path)

100%|██████████| 72/72 [00:17<00:00,  4.06it/s]


In [82]:
from timeit import default_timer as timer

path = 'data/train/'
start = timer()
train_x = load_data(path)
end = timer()

total_time = end - start
avg_time = total_time / len(os.listdir(path))
print('total time: {}, avg time per song: {}'.format(total_time, avg_time))

Done 0 tasks of 1534
Done 33 tasks of 1534
Done 66 tasks of 1534
Done 99 tasks of 1534
Done 132 tasks of 1534
Done 165 tasks of 1534
Done 198 tasks of 1534
Done 231 tasks of 1534
Done 264 tasks of 1534
Done 297 tasks of 1534
Done 330 tasks of 1534
Done 363 tasks of 1534
Done 396 tasks of 1534
Done 429 tasks of 1534
Done 462 tasks of 1534
Done 495 tasks of 1534
Done 528 tasks of 1534
Done 561 tasks of 1534
Done 594 tasks of 1534
Done 627 tasks of 1534
Done 660 tasks of 1534
Done 693 tasks of 1534
Done 726 tasks of 1534
Done 759 tasks of 1534
Done 792 tasks of 1534
Done 825 tasks of 1534
Done 858 tasks of 1534
Done 891 tasks of 1534
Done 924 tasks of 1534
Done 957 tasks of 1534
Done 990 tasks of 1534
Done 1023 tasks of 1534
Done 1056 tasks of 1534
Done 1089 tasks of 1534
Done 1122 tasks of 1534
Done 1155 tasks of 1534
Done 1188 tasks of 1534
Done 1221 tasks of 1534
Done 1254 tasks of 1534
Done 1287 tasks of 1534
Done 1320 tasks of 1534
Done 1353 tasks of 1534
Done 1386 tasks of 1534
Done

In [83]:
np.save('data/training_data_2bar.npy', train_x)

In [84]:
train_x.shape

(23760, 32, 130)

In [None]:
header = []
measures = 16
data = encode_midi(measureMap, t=measures*16, header=header)
strm = decode_midi(data, header)
strm.ticksPerQuarterNote = 1024
strm.makeRests(fillGaps=True, inPlace=True)
#strm.show('text')
strm.write('midi', fp='data/Sin_City_decoded.mid')