In [None]:
import pandas as pd
%load_ext autoreload
%autoreload 2
from matplotlib import pyplot as plt
from src.common import MaestroDataset

In [None]:
maestro_dataset = MaestroDataset()
dataset = maestro_dataset.csv
dataset.head()

In [None]:
list(dataset.columns)

In [None]:
print(f"Total Titles : {len(dataset['canonical_title'].unique())}")
print(f"Total Authors: {len(dataset['canonical_composer'].unique())}")
print(f"Total Years  : {len(dataset['year'].unique())}")

In [None]:
# Items per year
by_year = dataset.groupby(['year']).size()
by_year.plot(kind='bar', figsize=(15, 5))
plt.show()

In [None]:
# Items by author
by_author = dataset.groupby(['canonical_composer']).size()
by_author = by_author.sort_values(ascending=False)
by_author.plot(kind='bar', figsize=(15, 5))
plt.show()

In [None]:
# Items by split
by_split = dataset.groupby(['split']).size()
by_split.plot(kind='bar', figsize=(15, 5))
plt.show()

In [None]:
# Duration by split
duration_by_split = dataset.groupby(['split'])['duration'].mean()
duration_by_split.plot(kind='bar', figsize=(15, 5))
plt.show()

In [None]:
item = maestro_dataset.load_index(0)
audio = item.load_audio()
print(audio.duration)
audio.chunk(0, 10).plot_mel_spectrogram()

In [None]:
midi = item.load_midi()
print(midi.duration)
midi.plot_piano_roll_chunk(0, 10)
midi.save_piano_roll(MaestroDataset.SPLIT_ROOT)

In [None]:
import numpy as np

len(np.arange(0, 10, 10./313))

In [None]:
from IPython.lib.display import Audio

#play audio in notebook
# midi.midi
Audio(audio.audio, rate=audio.rate)

In [None]:
from multiprocessing import Pool
from tqdm.auto import tqdm
# Plot histogram of note usage across whole dataset
data = np.ndarray([])
maestro_items = [maestro_dataset.load_index(i) for i in tqdm(range(maestro_dataset.length()), total=maestro_dataset.length())]
def load_midi(item):
	return [note.pitch for note in item.load_midi().midi.instruments[0].notes]
with Pool(32) as p:
	maestro_midis = [item for key in tqdm(p.imap_unordered(load_midi, maestro_items), total=maestro_dataset.length()) for item in key]

In [None]:
# Get histogram of hist
hist_x = np.histogram(maestro_midis, bins=range(129))[0]
plt.bar(range(128), hist_x)
plt.show()


In [None]:
print(f"{hist_x.sum()} notes in total")
print("fLowest note: ", np.min(maestro_midis))
print("Highest note: ", np.max(maestro_midis))
print("Note spread: ", np.max(maestro_midis) - np.min(maestro_midis) + 1)