In [1]:
!sudo pip install -r requirements.txt

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
!wget https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip

--2020-01-30 18:28:58--  https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.20.128, 2607:f8b0:400e:c08::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.20.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 59243107 (56M) [application/zip]
Saving to: ‘maestro-v2.0.0-midi.zip.1’


2020-01-30 18:29:00 (66.5 MB/s) - ‘maestro-v2.0.0-midi.zip.1’ saved [59243107/59243107]



In [3]:
!7z x maestro-v2.0.0-midi.zip


7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.20GHz (406F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Sca        1 file, 59243107 bytes (57 MiB)

Extracting archive: maestro-v2.0.0-midi.zip
--
Path = maestro-v2.0.0-midi.zip
Type = zip
Physical Size = 59243107

    
Would you like to replace the existing file:
  Path:     ./maestro-v2.0.0/maestro-v2.0.0.csv
  Size:     300212 bytes (294 KiB)
  Modified: 2019-05-11 06:07:32
with the file from archive:
  Path:     maestro-v2.0.0/maestro-v2.0.0.csv
  Size:     300212 bytes (294 KiB)
  Modified: 2019-05-11 06:07:32
? (Y)es / (N)o / (A)lways / (S)kip all / A(u)to rename all / (Q)uit? ^C
(Y)es / (N)o / (A)lways / (S)kip all / A(u)to rename all / (Q)uit? 

In [4]:
import os
import numpy as np
import csv
import pretty_midi
import pprint
from PIL import Image
import matplotlib.pyplot as plt
# from matplotlib import cm
%matplotlib inline

In [5]:
def transpose_range(samples):
    merged_sample = np.zeros_like(samples[0])
    for sample in samples:
        merged_sample = np.maximum(merged_sample, sample)
    merged_sample = np.amax(merged_sample, axis=0)
    min_note = np.argmax(merged_sample)
    max_note = merged_sample.shape[0] - np.argmax(merged_sample[::-1])
    return min_note, max_note

def generate_add_centered_transpose(samples):
    num_notes = samples[0].shape[1]
    min_note, max_note = transpose_range(samples)
#     print(min_note, max_note)
    s = num_notes//2 - (max_note + min_note)//2
#     print(s)
    out_samples = samples
    out_lens = [len(samples), len(samples)]
    for i in range(len(samples)):
        out_sample = np.zeros_like(samples[i])
        out_sample[:,min_note+s:max_note+s] = samples[i][:,min_note:max_note]
        out_samples.append(out_sample)
    return out_samples, out_lens

In [6]:
from mido import MidiFile, MidiTrack, Message
import numpy as np

num_notes = 96
samples_per_measure = 96

def midi_to_samples(fname):
    has_time_sig = False
    flag_warning = False
    mid = MidiFile(fname)
    ticks_per_beat = mid.ticks_per_beat
    ticks_per_measure = 4 * ticks_per_beat

    for i, track in enumerate(mid.tracks):
        for msg in track:
            if msg.type == 'time_signature':
                new_tpm = msg.numerator * ticks_per_beat * 4 / msg.denominator
                if has_time_sig and new_tpm != ticks_per_measure:
                    flag_warning = True
                ticks_per_measure = new_tpm
                has_time_sig = True
    if flag_warning:
        print("  ^^^^^^ WARNING ^^^^^^")
        print("    " + fname)
        print("    Detected multiple distinct time signatures.")
        print("  ^^^^^^ WARNING ^^^^^^")
        return []

    all_notes = {}
    for i, track in enumerate(mid.tracks):
        abs_time = 0
        for msg in track:
            abs_time += msg.time
            if msg.type == 'note_on':
                if msg.velocity == 0:
                    continue
                note = msg.note - (128 - num_notes)/2
                assert(note >= 0 and note < num_notes)
                if note not in all_notes:
                    all_notes[note] = []
                else:
                    single_note = all_notes[note][-1]
                    if len(single_note) == 1:
                        single_note.append(single_note[0] + 1)
                all_notes[note].append([abs_time * samples_per_measure / ticks_per_measure])
            elif msg.type == 'note_off':
                if len(all_notes[note][-1]) != 1:
                    continue
                all_notes[note][-1].append(abs_time * samples_per_measure / ticks_per_measure)
    for note in all_notes:
        for start_end in all_notes[note]:
            if len(start_end) == 1:
                start_end.append(start_end[0] + 1)
    samples = []
    for note in all_notes:
        for start, end in all_notes[note]:
            sample_ix = int(start / samples_per_measure)
            while len(samples) <= sample_ix:
                samples.append(np.zeros((samples_per_measure, num_notes), dtype=np.uint8))
            sample = samples[sample_ix]
            start_ix = int(start - sample_ix * samples_per_measure)
# print(start_ix, note)
            if False:
                end_ix = int(min(end - sample_ix * samples_per_measure, samples_per_measure))
                while start_ix < end_ix:
                    sample[start_ix, int(note)] = 1
                    start_ix += 1
            else:
                sample[start_ix, int(note)] = 1
    return samples

In [9]:
base = './maestro-v2.0.0/'
csvfile = open('maestro-v2.0.0.csv')
reader = csv.DictReader(csvfile)
db = list(reader)
csvfile.close()
composers = set(row ['canonical_composer'] for row in db)
composers = list(filter(lambda x: x.find('/') == -1, composers))
print(composers)
a = 0
l = len(composers)
for composer in composers:
    print(composer, "{}/{}".format(a+1,l))
    a += 1
    all_samples = []
    all_lens = []
    for row in db:
        if composer == row['canonical_composer']:
            path = base + row['midi_filename']
            samples = midi_to_samples(path)
            sample, lens = generate_add_centered_transpose(samples)
            all_samples += samples
            all_lens += lens
    assert(sum(all_lens) == len(all_samples))
    print("Saving " + str(len(all_samples)) + " samples")
    all_samples = np.array(all_samples, dtype=np.uint8)
    dir_path = './dataset/' + composer.replace(' ', '_')
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    count = 0
    for i in all_samples:
        np.save("{}/{}".format(dir_path, str(count)), i)
        count += 1
# for row in db:
#     composer = row['canonical_composer']
#     path = base + row['midi_filename']
#     print(path)
#     samples = midi_to_samples(path)
#     samples, lens = generate_add_centered_transpose(samples)
#     print(samples)
#     all_samples[composer] = all_samples.get(composer, []) + samples
#     all_lens[composer] = all_lens.get(composer, []) + lens
#     assert(sum(all_lens[composer]) == len(all_samples[composer]))
#     print("Saving " + str(len(all_samples[composer])) + " samples...")
#     all_samples[composer] = np.array(all_samples[composer], dtype=np.uint8)
#     all_lens[composer] = np.array(all_lens[composer], dtype=np.uint32)      

['Felix Mendelssohn', 'Pyotr Ilyich Tchaikovsky', 'Henry Purcell', 'Alban Berg', 'Ludwig van Beethoven', 'Franz Liszt', 'Antonio Soler', 'Claude Debussy', 'Leoš Janáček', 'Jean-Philippe Rameau', 'Mily Balakirev', 'Edvard Grieg', 'Frédéric Chopin', 'Anton Arensky', 'Franz Schubert', 'Joseph Haydn', 'George Enescu', 'Nikolai Medtner', 'Johannes Brahms', 'Percy Grainger', 'Modest Mussorgsky', 'Robert Schumann', 'Domenico Scarlatti', 'George Frideric Handel', 'Isaac Albéniz', 'Wolfgang Amadeus Mozart', 'César Franck', 'Johann Pachelbel', 'Johann Sebastian Bach', 'Carl Maria von Weber', 'Muzio Clementi', 'Sergei Rachmaninoff', 'Alexander Scriabin', 'Orlando Gibbons']
Felix Mendelssohn 1/34
Saving 19076 samples
Pyotr Ilyich Tchaikovsky 2/34
Saving 2546 samples
Henry Purcell 3/34
Saving 538 samples
Alban Berg 4/34
Saving 1898 samples
Ludwig van Beethoven 5/34
Saving 99378 samples
Franz Liszt 6/34
Saving 72826 samples
Antonio Soler 7/34
Saving 232 samples
Claude Debussy 8/34
Saving 21480 sampl