## Converts to numpy arrays the chords and durations of the MAESTRO dataset

Data MAESTRO v3.0.0: https://magenta.tensorflow.org/datasets/maestro

Curtis Hawthorne, Andriy Stasyuk, Adam Roberts, Ian Simon, Cheng-Zhi Anna Huang,
  Sander Dieleman, Erich Elsen, Jesse Engel, and Douglas Eck. "Enabling
  Factorized Piano Music Modeling and Generation with the MAESTRO Dataset."
  In International Conference on Learning Representations, 2019.

In [1]:
import os
import pickle
import numpy as np
from music21 import note, chord, corpus, converter, stream, instrument
import glob
from tqdm.notebook import tqdm

## Get Dataset

In [3]:
data_dir = '## YOUR FOLDER PATH TO THE MAESTRO DATASET ##'

# list of files
midi_list = os.listdir(data_dir)

# Load and make list of stream objects
original_scores = []
for midi in tqdm(midi_list):
    score = converter.parse(os.path.join(data_dir,midi))
    original_scores.append(score)

  0%|          | 0/127 [00:00<?, ?it/s]

In [4]:
midi_list

['ORIG-MIDI_01_7_7_13_Group__MID--AUDIO_12_R1_2013_wav--1.midi',
 'ORIG-MIDI_03_7_6_13_Group__MID--AUDIO_09_R1_2013_wav--2.midi',
 'ORIG-MIDI_01_7_7_13_Group__MID--AUDIO_13_R1_2013_wav--1.midi',
 'ORIG-MIDI_03_7_6_13_Group__MID--AUDIO_10_R1_2013_wav--2.midi',
 'ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_01_R1_2013_wav--2.midi',
 'ORIG-MIDI_01_7_10_13_Group_MID--AUDIO_08_R3_2013_wav--2.midi',
 'ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_02_R1_2013_wav--2.midi',
 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_08_R1_2013_wav--3.midi',
 'ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_04_R1_2013_wav--3.midi',
 'ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_03_R1_2013_wav--2.midi',
 'ORIG-MIDI_01_7_7_13_Group__MID--AUDIO_11_R1_2013_wav--1.midi',
 'ORIG-MIDI_03_7_10_13_Group_MID--AUDIO_18_R3_2013_wav--2.midi',
 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_06_R1_2013_wav--4.midi',
 'ORIG-MIDI_02_7_8_13_Group__MID--AUDIO_14_R2_2013_wav--4.midi',
 'ORIG-MIDI_03_7_8_13_Group__MID--AUDIO_19_R2_2013_wav--3.midi',
 'ORIG-MIDI_03_7_10_13_Gr

In [5]:
len(midi_list)

127

## Merge Notes to Chords

In [6]:
# Merge notes into chords
original_scores = [midi.chordify() for midi in tqdm(original_scores)]

  0%|          | 0/127 [00:00<?, ?it/s]

In [7]:
original_scores[0][100]

<music21.chord.Chord G#3 F4 C#5>

## Get Notes and Duration Information

In [8]:
# Define empty lists of lists
original_chords = [[] for _ in original_scores]
original_durations = [[] for _ in original_scores]
original_keys = []

# Extract notes, chords, durations, and keys
for i, midi in tqdm(enumerate(original_scores)):
    original_keys.append(str(midi.analyze('key')))
    for element in midi:
        if isinstance(element, note.Note):
            original_chords[i].append(element.pitch)
            original_durations[i].append(element.duration.quarterLength)
        elif isinstance(element, chord.Chord):
            original_chords[i].append('.'.join(str(n) for n in element.pitches))
            original_durations[i].append(element.duration.quarterLength)

0it [00:00, ?it/s]

In [9]:
original_chords[0][0]

'C#3.F5'

In [10]:
original_chords[0][1]

'C#3.C#5'

In [11]:
len(original_chords)

127

In [89]:
original_chords_saved = np.array(original_chords, dtype=object)
original_durations_saved= np.array(original_durations, dtype=object)

np.save('original_chords_MAESTRO', original_chords_saved)
np.save('original_durations_MAESTRO', original_durations_saved)