In [1]:
import os
import numpy as np
import csv
import pretty_midi
import pprint
from PIL import Image
import matplotlib.pyplot as plt
# from matplotlib import cm
%matplotlib inline

In [2]:
def transpose_range(samples):
    merged_sample = np.zeros_like(samples[0])
    for sample in samples:
        merged_sample = np.maximum(merged_sample, sample)
    merged_sample = np.amax(merged_sample, axis=0)
    min_note = np.argmax(merged_sample)
    max_note = merged_sample.shape[0] - np.argmax(merged_sample[::-1])
    return min_note, max_note

def generate_add_centered_transpose(samples):
    num_notes = samples[0].shape[1]
    min_note, max_note = transpose_range(samples)
#     print(min_note, max_note)
    s = num_notes//2 - (max_note + min_note)//2
#     print(s)
    out_samples = samples
    out_lens = [len(samples), len(samples)]
    for i in range(len(samples)):
        out_sample = np.zeros_like(samples[i])
        out_sample[:,min_note+s:max_note+s] = samples[i][:,min_note:max_note]
        out_samples.append(out_sample)
    return out_samples, out_lens

In [3]:
from mido import MidiFile, MidiTrack, Message
import numpy as np

num_notes = 96
samples_per_measure = 96

def midi_to_samples(fname):
    has_time_sig = False
    flag_warning = False
    mid = MidiFile(fname)
    ticks_per_beat = mid.ticks_per_beat
    ticks_per_measure = 4 * ticks_per_beat

    for i, track in enumerate(mid.tracks):
        for msg in track:
            if msg.type == 'time_signature':
                new_tpm = msg.numerator * ticks_per_beat * 4 / msg.denominator
                if has_time_sig and new_tpm != ticks_per_measure:
                    flag_warning = True
                ticks_per_measure = new_tpm
                has_time_sig = True
    if flag_warning:
        print("  ^^^^^^ WARNING ^^^^^^")
        print("    " + fname)
        print("    Detected multiple distinct time signatures.")
        print("  ^^^^^^ WARNING ^^^^^^")
        return []

    all_notes = {}
    for i, track in enumerate(mid.tracks):
        abs_time = 0
        for msg in track:
            abs_time += msg.time
            if msg.type == 'note_on':
                if msg.velocity == 0:
                    continue
                note = msg.note - (128 - num_notes)/2
                assert(note >= 0 and note < num_notes)
                if note not in all_notes:
                    all_notes[note] = []
                else:
                    single_note = all_notes[note][-1]
                    if len(single_note) == 1:
                        single_note.append(single_note[0] + 1)
                all_notes[note].append([abs_time * samples_per_measure / ticks_per_measure])
            elif msg.type == 'note_off':
                if len(all_notes[note][-1]) != 1:
                    continue
                all_notes[note][-1].append(abs_time * samples_per_measure / ticks_per_measure)
    for note in all_notes:
        for start_end in all_notes[note]:
            if len(start_end) == 1:
                start_end.append(start_end[0] + 1)
    samples = []
    for note in all_notes:
        for start, end in all_notes[note]:
            sample_ix = int(start / samples_per_measure)
            while len(samples) <= sample_ix:
                samples.append(np.zeros((samples_per_measure, num_notes), dtype=np.uint8))
            sample = samples[sample_ix]
            start_ix = int(start - sample_ix * samples_per_measure)
# print(start_ix, note)
            if False:
                end_ix = int(min(end - sample_ix * samples_per_measure, samples_per_measure))
                while start_ix < end_ix:
                    sample[start_ix, int(note)] = 1
                    start_ix += 1
            else:
                sample[start_ix, int(note)] = 1
    return samples

In [None]:
base = './dataset/'
csvfile = open('maestro-v2.0.0.csv')
reader = csv.DictReader(csvfile)
db = list(reader)
csvfile.close()
composers = set(row ['canonical_composer'] for row in db)
composers = list(filter(lambda x: x.find('/') == -1, composers))
print(composers)
for composer in composers:
    print("2")
    all_samples = []
    all_lens = []
    for row in db:
        if composer == row['canonical_composer']:
            path = base + row['midi_filename']
            samples = midi_to_samples(path)
            sample, lens = generate_add_centered_transpose(samples)
            all_samples += samples
            all_lens += lens
    assert(sum(all_lens) == len(all_samples))
    print(f"Saving {len(all_samples)} samples....")
    all_samples = np.array(all_samples, dtype=np.uint8)
    dir_path = base + composer.replace(' ', '_')
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    count = 0
    for i in all_samples:
        im = Image.fromarray(i)
        plt.imsave(f"{dir_path}/{count}.png", im, cmap="gray")
        count += 1
# for row in db:
#     composer = row['canonical_composer']
#     path = base + row['midi_filename']
#     print(path)
#     samples = midi_to_samples(path)
#     samples, lens = generate_add_centered_transpose(samples)
#     print(samples)
#     all_samples[composer] = all_samples.get(composer, []) + samples
#     all_lens[composer] = all_lens.get(composer, []) + lens
#     assert(sum(all_lens[composer]) == len(all_samples[composer]))
#     print("Saving " + str(len(all_samples[composer])) + " samples...")
#     all_samples[composer] = np.array(all_samples[composer], dtype=np.uint8)
#     all_lens[composer] = np.array(all_lens[composer], dtype=np.uint32)      

['Orlando Gibbons', 'Pyotr Ilyich Tchaikovsky', 'Johann Sebastian Bach', 'Carl Maria von Weber', 'Claude Debussy', 'Jean-Philippe Rameau', 'Johannes Brahms', 'Robert Schumann', 'Domenico Scarlatti', 'Percy Grainger', 'Isaac Albéniz', 'Edvard Grieg', 'Ludwig van Beethoven', 'Henry Purcell', 'Johann Pachelbel', 'César Franck', 'Anton Arensky', 'Mily Balakirev', 'Muzio Clementi', 'Sergei Rachmaninoff', 'Joseph Haydn', 'Felix Mendelssohn', 'Alexander Scriabin', 'Franz Liszt', 'Nikolai Medtner', 'Wolfgang Amadeus Mozart', 'Antonio Soler', 'George Enescu', 'Frédéric Chopin', 'Leoš Janáček', 'Alban Berg', 'Franz Schubert', 'George Frideric Handel', 'Modest Mussorgsky']
2
Saving 242 samples....
2
Saving 2546 samples....
2
Saving 49266 samples....
2
Saving 940 samples....
2
Saving 21480 samples....
2
Saving 332 samples....
2
Saving 28972 samples....
2
Saving 56506 samples....
2
Saving 6144 samples....
2
Saving 434 samples....
2
Saving 3122 samples....
2
Saving 366 samples....
2
Saving 99378 sam