In [4]:
!sudo pip install -r requirements.txt

Collecting pillow
[?25l  Downloading https://files.pythonhosted.org/packages/c6/42/fdaf9b53942b103462db3d843c5bc3eb660f9b2e58419ebc99ed87d93dd2/Pillow-7.0.0-cp35-cp35m-manylinux1_x86_64.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 4.6MB/s eta 0:00:01
[?25hCollecting pip-chill
  Downloading https://files.pythonhosted.org/packages/f8/d7/957de46a8bb9c87cafc3742e95164e306a7d6c2627c48f0248ee13ace117/pip_chill-0.1.8-py2.py3-none-any.whl
Collecting py-midi
  Downloading https://files.pythonhosted.org/packages/72/9b/42dd830c90d4b1d3d0280148516a396f64d1d98b6d80701c5999ce6124b2/py_midi-2.0.1-py3-none-any.whl
Collecting pypianoroll
  Downloading https://files.pythonhosted.org/packages/17/93/cca689c3e7f217a4a1906f6b96e81c4d57d423ff6778dcc7af3bad11c638/pypianoroll-0.5.3.tar.gz
Collecting pyserial
[?25l  Downloading https://files.pythonhosted.org/packages/0d/e4/2a744dd9e3be04a0c0907414e2a01a7c88bb3915cbe3c8cc06e209f59c30/pyserial-3.4-py2.py3-none-any.whl (193kB)
[K     |█████████

In [7]:
!wget https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip

--2020-01-28 09:01:02--  https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.195.128, 2607:f8b0:400e:c09::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.195.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 59243107 (56M) [application/zip]
Saving to: ‘maestro-v2.0.0-midi.zip’


2020-01-28 09:01:03 (85.6 MB/s) - ‘maestro-v2.0.0-midi.zip’ saved [59243107/59243107]



In [13]:
!7z x maestro-v2.0.0-midi.zip


7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.20GHz (406F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Sca        1 file, 59243107 bytes (57 MiB)

Extracting archive: maestro-v2.0.0-midi.zip
--
Path = maestro-v2.0.0-midi.zip
Type = zip
Physical Size = 59243107

     15% 214 - maestro-v2.0.0/2009/MIDI-Unproc . _R1_2009_20_R1_2009_06_WAV.mi                                                                           29% 445 - maestro-v2.0.0/2014/MIDI-UNPROCE . MID--AUDIO_13_R1_2014_wav--6.mid                                                                               45% 599 - maestro-v2.0.0/2006/MIDI-Unpro . 11_R1_2006_04_Track04_wav.mi                                                                         60% 841 - maestro-v2.0.0/2015/MIDI-Unpro . from_mp3_07_R1_2015_wav--2.mid                                                                           75% 

In [5]:
import os
import numpy as np
import csv
import pretty_midi
import pprint
from PIL import Image
import matplotlib.pyplot as plt
# from matplotlib import cm
%matplotlib inline

In [6]:
def transpose_range(samples):
    merged_sample = np.zeros_like(samples[0])
    for sample in samples:
        merged_sample = np.maximum(merged_sample, sample)
    merged_sample = np.amax(merged_sample, axis=0)
    min_note = np.argmax(merged_sample)
    max_note = merged_sample.shape[0] - np.argmax(merged_sample[::-1])
    return min_note, max_note

def generate_add_centered_transpose(samples):
    num_notes = samples[0].shape[1]
    min_note, max_note = transpose_range(samples)
#     print(min_note, max_note)
    s = num_notes//2 - (max_note + min_note)//2
#     print(s)
    out_samples = samples
    out_lens = [len(samples), len(samples)]
    for i in range(len(samples)):
        out_sample = np.zeros_like(samples[i])
        out_sample[:,min_note+s:max_note+s] = samples[i][:,min_note:max_note]
        out_samples.append(out_sample)
    return out_samples, out_lens

In [14]:
from mido import MidiFile, MidiTrack, Message
import numpy as np

num_notes = 96
samples_per_measure = 96

def midi_to_samples(fname):
    has_time_sig = False
    flag_warning = False
    mid = MidiFile(fname)
    ticks_per_beat = mid.ticks_per_beat
    ticks_per_measure = 4 * ticks_per_beat

    for i, track in enumerate(mid.tracks):
        for msg in track:
            if msg.type == 'time_signature':
                new_tpm = msg.numerator * ticks_per_beat * 4 / msg.denominator
                if has_time_sig and new_tpm != ticks_per_measure:
                    flag_warning = True
                ticks_per_measure = new_tpm
                has_time_sig = True
    if flag_warning:
        print("  ^^^^^^ WARNING ^^^^^^")
        print("    " + fname)
        print("    Detected multiple distinct time signatures.")
        print("  ^^^^^^ WARNING ^^^^^^")
        return []

    all_notes = {}
    for i, track in enumerate(mid.tracks):
        abs_time = 0
        for msg in track:
            abs_time += msg.time
            if msg.type == 'note_on':
                if msg.velocity == 0:
                    continue
                note = msg.note - (128 - num_notes)/2
                assert(note >= 0 and note < num_notes)
                if note not in all_notes:
                    all_notes[note] = []
                else:
                    single_note = all_notes[note][-1]
                    if len(single_note) == 1:
                        single_note.append(single_note[0] + 1)
                all_notes[note].append([abs_time * samples_per_measure / ticks_per_measure])
            elif msg.type == 'note_off':
                if len(all_notes[note][-1]) != 1:
                    continue
                all_notes[note][-1].append(abs_time * samples_per_measure / ticks_per_measure)
    for note in all_notes:
        for start_end in all_notes[note]:
            if len(start_end) == 1:
                start_end.append(start_end[0] + 1)
    samples = []
    for note in all_notes:
        for start, end in all_notes[note]:
            sample_ix = int(start / samples_per_measure)
            while len(samples) <= sample_ix:
                samples.append(np.zeros((samples_per_measure, num_notes), dtype=np.uint8))
            sample = samples[sample_ix]
            start_ix = int(start - sample_ix * samples_per_measure)
# print(start_ix, note)
            if False:
                end_ix = int(min(end - sample_ix * samples_per_measure, samples_per_measure))
                while start_ix < end_ix:
                    sample[start_ix, int(note)] = 1
                    start_ix += 1
            else:
                sample[start_ix, int(note)] = 1
    return samples

In [22]:
base = './maestro-v2.0.0/'
csvfile = open('maestro-v2.0.0.csv')
reader = csv.DictReader(csvfile)
db = list(reader)
csvfile.close()
composers = set(row ['canonical_composer'] for row in db)
composers = list(filter(lambda x: x.find('/') == -1, composers))
print(composers)
for composer in composers:
    print("2")
    all_samples = []
    all_lens = []
    for row in db:
        if composer == row['canonical_composer']:
            path = base + row['midi_filename']
            samples = midi_to_samples(path)
            sample, lens = generate_add_centered_transpose(samples)
            all_samples += samples
            all_lens += lens
    assert(sum(all_lens) == len(all_samples))
    print("Saving " + str(len(all_samples)) + " samples")
    all_samples = np.array(all_samples, dtype=np.uint8)
    dir_path = './dataset/' + composer.replace(' ', '_')
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    count = 0
    for i in all_samples:
        im = Image.fromarray(i)
        plt.imsave("{}/{}.png".format(dir_path, str(count)), im, cmap="gray")
        count += 1
# for row in db:
#     composer = row['canonical_composer']
#     path = base + row['midi_filename']
#     print(path)
#     samples = midi_to_samples(path)
#     samples, lens = generate_add_centered_transpose(samples)
#     print(samples)
#     all_samples[composer] = all_samples.get(composer, []) + samples
#     all_lens[composer] = all_lens.get(composer, []) + lens
#     assert(sum(all_lens[composer]) == len(all_samples[composer]))
#     print("Saving " + str(len(all_samples[composer])) + " samples...")
#     all_samples[composer] = np.array(all_samples[composer], dtype=np.uint8)
#     all_lens[composer] = np.array(all_lens[composer], dtype=np.uint32)      

['Alexander Scriabin', 'Carl Maria von Weber', 'Isaac Albéniz', 'Percy Grainger', 'Henry Purcell', 'Anton Arensky', 'Johann Pachelbel', 'Johann Sebastian Bach', 'Franz Liszt', 'Johannes Brahms', 'Modest Mussorgsky', 'Sergei Rachmaninoff', 'César Franck', 'Leoš Janáček', 'Orlando Gibbons', 'George Enescu', 'Felix Mendelssohn', 'Jean-Philippe Rameau', 'Wolfgang Amadeus Mozart', 'Frédéric Chopin', 'Nikolai Medtner', 'Pyotr Ilyich Tchaikovsky', 'Antonio Soler', 'Ludwig van Beethoven', 'Domenico Scarlatti', 'Edvard Grieg', 'Franz Schubert', 'Joseph Haydn', 'Mily Balakirev', 'Muzio Clementi', 'George Frideric Handel', 'Robert Schumann', 'Claude Debussy', 'Alban Berg']
2
Saving 19204 samples
2
Saving 940 samples
2
Saving 3122 samples
2
Saving 434 samples
2
Saving 538 samples
2
Saving 5060 samples
2
Saving 170 samples
2
Saving 49266 samples
2
Saving 72826 samples
2
Saving 28972 samples
2
Saving 11416 samples
2
Saving 23904 samples
2
Saving 4580 samples
2
Saving 2776 samples
2
Saving 242 sample