In [88]:
# import fluidsynth
import glob
import numpy as np
import pandas as pd
import pretty_midi
import collections
from pathlib import Path
from IPython import display
import json

import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn

%matplotlib inline
from matplotlib import pyplot as plt

recreate the model that was used for training - which is located in  `./baseline_1/train_baseline.py`

In [95]:
from baseline_2.train import PianoRollLSTM, PianoRoll, get_df_meta

load the model state and it put it in eval mode

In [96]:
model = PianoRollLSTM(hidden_size=60)
model_weights_file = 'baseline_2/first_run/model_weights_iter2180000.pth'
model.load_state_dict(torch.load(model_weights_file))
model.eval()

PianoRollLSTM(
  (lstm): LSTM(128, 60, batch_first=True)
  (norm): BatchNorm1d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pitch_layer): Sequential(
    (0): Linear(in_features=60, out_features=128, bias=True)
    (1): Sigmoid()
  )
)

In [97]:
_datadir = './data/classical'
_datadir = Path(_datadir)
df_meta = get_df_meta(_datadir)

In [98]:
seq_length = 30

df_chpn = df_meta[df_meta['composer'] == 'chpn']
rng = np.random.default_rng(12345)
idx = np.arange(df_chpn.shape[0])
n_train = int(0.8*idx.shape[0])
train_idx = rng.choice(idx, size=n_train, replace=False)
test_idx = idx[~np.in1d(idx, train_idx)]
df_train = df_chpn.iloc[train_idx]
df_test = df_chpn.iloc[test_idx]

dset_train = PianoRoll(df_meta=df_train, 
                    seq_length=seq_length)

dset_test = PianoRoll(df_meta=df_test, 
                    seq_length=seq_length,
                    max_windows=None)

In [99]:
from preprocess.process_output import pianoRoll_to_midi
_SAMPLING_RATE = 16000

def display_audio(pm: pretty_midi.PrettyMIDI, seconds=30):
    waveform= pm.fluidsynth(fs=_SAMPLING_RATE)
    # Take a sample of the generated waveform to mitigate kernel resets
    # waveform_short = waveform[:seconds*_SAMPLING_RATE]
    return display.Audio(waveform, rate=_SAMPLING_RATE)

In [100]:
dset_test.df_meta

Unnamed: 0,file,composer,end_time,expected_tempo,sampling_note_duration,roll_length,n_windows,file_idx_ends
11,data/classical/chpn_op35_2.mid,chpn,392.945705,172.375485,0.174039,2257,2226,2225
241,data/classical/chpn_op23.mid,chpn,539.483098,185.738568,0.161517,3340,3309,5534
306,data/classical/chpn_op25_e11.mid,chpn,186.978151,189.91644,0.157964,1183,1152,6686
312,data/classical/chpn_op66.mid,chpn,274.430146,211.482622,0.141856,1934,1903,8589
318,data/classical/chpn_op25_e2.mid,chpn,98.857577,174.116723,0.172298,515,484,9073


In [86]:
next_frames = []
start_idx = 0
seq_len = 30*2
sequence, label = dset_test[start_idx]
timestep = dset_test.df_meta.iloc[0]['sampling_note_duration']

primer = pd.DataFrame(np.asarray(sequence).astype(int))
primer_midi = pianoRoll_to_midi(primer, timestep=timestep)
display.display(display_audio(primer_midi))

with torch.no_grad():
    for idx in range(seq_len):
        # sequence, label = dset_test[start_idx + idx]
        pred = model(sequence.view(1, *sequence.shape))
        next_frame = (pred > 0.5).float()
        next_frames.append(next_frame)
        extended_sequence = torch.cat([sequence, next_frame], dim=0)
        sequence = extended_sequence[1:]
        # sequence = extended_sequence

out = torch.cat(next_frames,dim=0)
out = np.asarray(out).astype(int)
df_proll = pd.DataFrame(out)
timestep = dset_test.df_meta.iloc[0]['sampling_note_duration']

midi = pianoRoll_to_midi(df_proll, timestep=timestep)
display_audio(midi)

In [87]:
midi.write('./baseline_2/pianoroll_chopin_example_output.mid')
primer_midi.write('./baseline_2/pianoroll_chopin_example_primer.mid')

In [89]:
from benchmark.train import PreprocessedPianoRoll, get_preprocessed_files
from benchmark.train import PianoRollLSTM as BenchmarkModel

In [90]:
benchmark_model = BenchmarkModel(hidden_size=60)
benchmark_model_weights_file = 'benchmark/third_run/model_weights_iter950000.pth'
benchmark_model.load_state_dict(torch.load(benchmark_model_weights_file))
benchmark_model.eval()

PianoRollLSTM(
  (lstm): LSTM(128, 60, batch_first=True)
  (norm): BatchNorm1d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pitch_layer): Sequential(
    (0): Linear(in_features=60, out_features=128, bias=True)
    (1): Sigmoid()
  )
)

In [91]:
config_file_path = './benchmark/third_run/config.json'
with open(config_file_path, 'r') as f:
    config = json.load(f)

_datadir = './data/benchmark_processed_handless'
_datadir = Path(_datadir)
df_meta = get_preprocessed_files(_datadir)
print('data fetched', flush=True)

g4_mask = df_meta['piece_name'].str.contains('G4')
df_train = df_meta[~g4_mask]
df_test = df_meta[g4_mask]

seq_length = config['SEQUENCE_LENGTH']
learning_rate = config['LEARN_RATE']
batch_size = config['BATCH_SIZE']
num_workers = 0
num_epochs = config['NUM_EPOCHS']
out_interval = config['OUT_INTERVAL']
hidden_size = config['HIDDEN_SIZE']


dset_train = PreprocessedPianoRoll(df_meta=df_train, 
                    seq_length=seq_length,
                    max_windows=None)

dset_test = PreprocessedPianoRoll(df_meta=df_test, 
                    seq_length=seq_length,
                    max_windows=None)

data fetched


In [54]:
dset_train.df_meta

Unnamed: 0,file,piece_name,roll_length,n_windows,file_idx_ends
0,data/benchmark_processed_handless/minArp_time=...,minArp_time=1_pitch=C5_octvs=4_loops=2,48,15,14
1,data/benchmark_processed_handless/chromatic_ti...,chromatic_time=1_pitch=C#4_octvs=2_loops=2,97,64,78
2,data/benchmark_processed_handless/minArp_time=...,minArp_time=1_pitch=E4_octvs=3_loops=2,37,4,82
3,data/benchmark_processed_handless/minArp_time=...,minArp_time=1_pitch=A#4_octvs=3_loops=2,37,4,86
6,data/benchmark_processed_handless/maj_time=1_p...,maj_time=1_pitch=B4_octvs=2_loops=2,56,23,109
...,...,...,...,...,...
235,data/benchmark_processed_handless/min_time=1_p...,min_time=1_pitch=A4_octvs=2_loops=2,56,23,6866
236,data/benchmark_processed_handless/minArp_time=...,minArp_time=1_pitch=F#4_octvs=2_loops=2,25,-8,6858
237,data/benchmark_processed_handless/maj_time=1_p...,maj_time=1_pitch=C5_octvs=1_loops=2,29,-4,6854
238,data/benchmark_processed_handless/chromatic_ti...,chromatic_time=1_pitch=D4_octvs=3_loops=2,145,112,6966


In [94]:
next_frames = []
start_idx = 79
seq_len = 12*12
sequence, label = dset_train[start_idx]
# timestep = dset_test.df_meta.iloc[0]['sampling_note_duration']
timestep = 0.1

primer = pd.DataFrame(np.asarray(sequence).astype(int))
display.display(
    display_audio(pianoRoll_to_midi(primer, timestep=timestep))
)

with torch.no_grad():
    for idx in range(seq_len):
        # sequence, label = dset_test[start_idx + idx]
        pred = benchmark_model(sequence.view(1, *sequence.shape))
        next_frame = (pred > 0.5).float()
        next_frames.append(next_frame)
        extended_sequence = torch.cat([sequence, next_frame], dim=0)
        sequence = extended_sequence[1:]
        # sequence = extended_sequence

out = torch.cat(next_frames,dim=0)
out = np.asarray(out).astype(int)
df_proll = pd.DataFrame(out)
# timestep = dset_test.df_meta.iloc[0]['sampling_note_duration']

midi = pianoRoll_to_midi(df_proll, timestep=timestep)
display_audio(midi)

In [61]:
midi.write('benchmark_run2_longseq_minArp_time=1_pitch=C5_octvs=4_loopsid')