In [80]:
import os
import sys
import glob
import numpy as np
import pickle
from PIL import Image
from music21 import converter, corpus, instrument, midi, note, chord, pitch

sys.path.insert(0, os.path.abspath('..'))  # noqa
from lucky_trainer.misc.custom_dataset_classes import NumpyDataset

mf = "edm"

os.makedirs(mf + '/input_data/', exist_ok=True)
os.makedirs(mf + '/encodings/', exist_ok=True)
os.makedirs(mf + '/compositions/', exist_ok=True)
os.makedirs(mf + '/saved_models/', exist_ok=True)

In [74]:
def build_dictionary(file_name):
    notes = []
    chords = []
    durations = []
    offsets = []
    for file in glob.glob(file_name + "/original_data/*.mid"):
        last_offset = 0
        midi = converter.parse(file)
        print("Read in possible notes/chords of %s" % file)
        
        try:  # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:  # file has notes in a flat structure
            notes_to_parse = midi.notes
            
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(element.nameWithOctave)
            elif isinstance(element, chord.Chord):
                chords.append(element.commonName)
            else:
                continue
            durations.append(element.duration.quarterLength)
            element_offset = element.offset
            if last_offset == 0:
                last_offset = element_offset
            offsets.append(element_offset - last_offset)
            last_offset = element_offset
            
    l = set.union(set(notes), set(chords))   
    mapping_notes = dict([(y,x+1) for x,y in enumerate(sorted(l))])
    mapping_durations = dict([(y,x+1) for x,y in enumerate(sorted(set(durations)))])
    mapping_offsets = dict([(y,x+1) for x,y in enumerate(sorted(set(offsets)))])
    
    return mapping_notes, mapping_durations, mapping_offsets

In [125]:
def encode_folder(music_folder):
    mapping_notes, mapping_durations, mapping_offsets = build_dictionary(music_folder)
    
    # save dictionary
    f = open(music_folder + "/element_key_dict.pkl","wb")
    pickle.dump(mapping_notes, f)
    f.close()
    f = open(music_folder + "/durations_key_dict.pkl","wb")
    pickle.dump(mapping_durations, f)
    f.close()
    f = open(music_folder + "/offsets_key_dict.pkl","wb")
    pickle.dump(mapping_offsets, f)
    f.close()
    
    print("Read in completed. \n")
    
    for file in glob.glob(music_folder + "/original_data/*.mid"):
        midi = converter.parse(file)
        print("Parsing %s" % file)

        try:  # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:  # file has notes in a flat structure
            notes_to_parse = midi.notes
            
        midi_encoding = []
        last_offset = 0
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                element_key = element.nameWithOctave
            elif isinstance(element, chord.Chord):
                element_key = element.commonName
            else:
                continue
            element_offset = element.offset
            if last_offset == 0:
                last_offset = element_offset
            element_duration = element.duration
            max_key = max(len(mapping_notes), len(mapping_offsets), len(mapping_durations))
            np_arr = np.stack(np.array([np.zeros(max_key, dtype=np.float32), 
                               np.zeros(max_key, dtype=np.float32), 
                               np.zeros(max_key, dtype=np.float32)]))
            np_arr[0][mapping_notes[element_key]-1] = 1
            np_arr[1][mapping_offsets[element_offset-last_offset]-1] = 1
            np_arr[2][mapping_durations[element_duration.quarterLength]-1] = 1
            midi_encoding.append(np_arr)
            
            last_offset = element_offset

        file_encoding = np.array(midi_encoding)
        f = open(music_folder + "/encodings/" + os.path.basename(file)[:-4] + "_encoding.pkl","wb")
        pickle.dump(file_encoding, f)
        f.close()
        

In [126]:
encode_folder(mf)

Read in possible notes/chords of edm/original_data\deadmau5 - Deus Ex Machina  (midi by Carlo Prato) (www.cprato.com).mid
Read in completed. 

Parsing edm/original_data\deadmau5 - Deus Ex Machina  (midi by Carlo Prato) (www.cprato.com).mid


In [135]:
def get_dl_data(music_folder, seq_len=25):
    x_data = []
    y_data = []
    for file in glob.glob(music_folder + "/encodings/*_encoding.pkl"):
        print("Loading %s" % file)
        with open(file, 'rb') as f:
            data = pickle.load(f)
        all_sequences_length = len(data)-2*seq_len
        if all_sequences_length < 0:
            print("File %s is too small. Skipping." % file)
            break
        for i in range(all_sequences_length):
            x_data.append(data[i:i+seq_len])
            y_data.append(data[i+seq_len:i+2*seq_len].astype(np.int64))
    
    print("Extraction completed.\n")
    return np.array(x_data), np.array(y_data)

def get_val_split(inputs, targets, split=10000):
    dataset_length = len(inputs)
    indices = list(range(dataset_length))
    np.random.shuffle(indices)
    if split <= 1:
        mapping_val = indices[:int(split * dataset_length)]
        mapping_train = indices[int(split * dataset_length):]
    else:
        mapping_val = indices[:split]
        mapping_train = indices[split:]

    return (inputs[mapping_train], targets[mapping_train],
            inputs[mapping_val], targets[mapping_val])

In [136]:
x, y = get_dl_data(mf, 10)
train_in, train_out, val_in, val_out = get_val_split(x, y, split=10)

Loading edm/encodings\deadmau5 - Deus Ex Machina  (midi by Carlo Prato) (www.cprato.com)_encoding.pkl
Extraction completed.



In [137]:
# Create PyTorch Dataset
train = NumpyDataset(train_in, train_out)
val = NumpyDataset(val_in, val_out)

# Save the Dataset
outfile = open(mf + "/input_data/train", 'wb')
pickle.dump(train, outfile)
outfile.close()
outfile = open(mf + "/input_data/val", 'wb')
pickle.dump(val, outfile)
outfile.close()
print(mf + " data saved.")

edm data saved.
