In [None]:
%load_ext autoreload
%autoreload 2

import torch
import torchaudio
import numpy as np
from mido import MidiFile, MidiTrack, Message
import matplotlib.pyplot as plt
import librosa

from Dataset import MidiDataset
import MidiUtils as mu
from Note import Note
from Song import Song

dataset_path = "/Users/andreas/Development/Midi-Conversion/maestro-v3.0.0"
workspace = "/Users/andreas/Development/Midi-Conversion/PianoTranscription"

In [None]:
MidiDataset.create_dataset_files(dataset_path, dataset_path)

In [None]:
train_set = MidiDataset(dataset_path, "train")
val_set = MidiDataset(dataset_path, "validation")
test_set = MidiDataset(dataset_path, "test")

audio_path = train_set.get_audio_path(0)
midi_path = train_set.get_midi_path(0)

metadata = torchaudio.info(audio_path)
print("Train file 0: ", metadata)


In [None]:
metadata = torchaudio.info('./Planet_Earth_II.mp3')
print("Test file: ", metadata)

In [None]:
waveform, sample_rate = torchaudio.load(audio_path)

import PlotUtils
PlotUtils.print_stats(waveform, sample_rate=sample_rate)
PlotUtils.plot_waveform(waveform[:, int(2.5*sample_rate):int(7.5*sample_rate)], sample_rate)
PlotUtils.plot_specgram(waveform[:, int(2.5*sample_rate):int(7.5*sample_rate)], sample_rate)
PlotUtils.play_audio(waveform, sample_rate)

In [None]:
from mido import MidiFile, MidiTrack, Message


midi = MidiFile(midi_path, clip=True)
print('Filename: {}, length: {}'.format(midi.filename, midi.length))
print("Number of tracks: {}".format(len(midi.tracks)))

mu.print_midi_info(midi_path)
notes = Note.midi_to_notes(midi)

In [None]:
tempo = 500000
midi.tracks[0]
for msg in midi.tracks[0]:
    if msg.type == 'set_tempo':
        tempo = msg.tempo
        break


song = Song(notes, midi.length, ticks_per_beat=midi.ticks_per_beat, tempo=tempo)  
print("song: ", song)
midi_tensor = song.to_start_time_tensor(discretization_step=100)  

In [None]:
# Print occuring notes
occuring_notes = torch.max(midi_tensor, axis=0).values
for i in range(128):
    hasNoteOccured = occuring_notes[i] > 0
    print(f'Has note {i} occured: {bool(hasNoteOccured)}')



In [None]:
  
anyNotePlayed = torch.max(midi_tensor, dim=1).values

for i in range(len(anyNotePlayed)):
    print("Time: ", i, "Any note played: ", bool(anyNotePlayed[i]))
    
# Get times at which any note is played
times = torch.nonzero(anyNotePlayed).flatten()
print("Times at which any note is played: ", times)

In [None]:
PlotUtils.plot_tensor_as_image(midi_tensor.T)

In [None]:
y, sr = librosa.load(audio_path)
C = np.abs(librosa.cqt(y, sr=sr))
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
                               sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
ax.set_title('Constant-Q power spectrum')
fig.colorbar(img, ax=ax, format="%+2.0f dB")

# Set figsize
fig.set_figwidth(400)
fig.set_figheight(6)

In [None]:
audio_tensor, midi_tensor = train_set[0]
print(f"Audio tensor shape: {audio_tensor.shape}, midi tensor shape: {midi_tensor.shape}")