In [1]:
import torch
import torch.nn as nn
from codebase.model import Model
from codebase.data import Dataset
from codebase.train import train, step
from codebase.inference import generate, tokens_to_segs
from codebase.utils import load_pkl, plot_list, visualize_model
from codebase.preprocessing import create_dataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

Using device: cuda


In [35]:
create_dataset(
    nocturnes=True,
    dataset_path='maestro-v3.0.0',
    output_file='nocturnes.pkl',
    seg_fit_tightness=0.12
)

Loading paths...
Found 19 tracks
Processing track 1/19: MIDI-Unprocessed_R1_D1-9-12_mid--AUDIO-from_mp3_12_R1_2015_wav--3.midi
Processing track 2/19: MIDI-Unprocessed_22_R2_2011_MID--AUDIO_R2-D5_10_Track10_wav.midi
Processing track 3/19: MIDI-Unprocessed_R1_D2-21-22_mid--AUDIO-from_mp3_22_R1_2015_wav--4.midi
Processing track 4/19: MIDI-Unprocessed_058_PIANO058_MID--AUDIO-split_07-07-17_Piano-e_2-02_wav--4.midi
Processing track 5/19: MIDI-Unprocessed_02_R2_2011_MID--AUDIO_R2-D1_03_Track03_wav.midi
Processing track 6/19: MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3_01_R1_2015_wav--2.midi
Processing track 7/19: MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3_04_R1_2015_wav--3.midi
Processing track 8/19: MIDI-Unprocessed_072_PIANO072_MID--AUDIO-split_07-08-17_Piano-e_1-06_wav--3.midi
Processing track 9/19: MIDI-Unprocessed_06_R1_2009_03-07_ORIG_MID--AUDIO_06_R1_2009_06_R1_2009_05_WAV.midi
Processing track 10/19: MIDI-Unprocessed_06_R3_2011_MID--AUDIO_R3-D3_05_Track05_wav.midi
Processing t

In [None]:
# dataset = Overfitting tiny Dataset
dataset = load_pkl('small.pkl')
dataset.tracks = [dataset.tracks[0]]
for track_idx in range(len(dataset.tracks)):
    dataset.tracks[track_idx] = [[n for n in dataset.tracks[track_idx][0] if n.start < 11], [s for s in dataset.tracks[track_idx][1] if s.time < 11]]

In [2]:
# dataset = nocturnes
dataset = load_pkl('nocturnes.pkl')

In [11]:
print(dataset.tracks)

[[[Note(start=np.float64(0.9791243466161011), duration=np.float64(1.296732864010167), pitch=62, velocity=7), Note(start=np.float64(2.1888688905756637), duration=np.float64(2.1089610151803417), pitch=67, velocity=8), Note(start=np.float64(2.2080872403542857), duration=np.float64(0.8830325977229918), pitch=47, velocity=4), Note(start=np.float64(2.2090987324478975), duration=np.float64(0.8688717084124288), pitch=55, velocity=4), Note(start=np.float64(2.2374205110690246), duration=np.float64(2.76036192346626), pitch=43, velocity=3), Note(start=np.float64(2.6895574769134427), duration=np.float64(0.09609174889310924), pitch=50, velocity=4), Note(start=np.float64(3.006154502213897), duration=np.float64(0.09305727261227396), pitch=50, velocity=4), Note(start=np.float64(3.2984757172676713), duration=np.float64(0.6261136059456268), pitch=55, velocity=4), Note(start=np.float64(3.3338779405440797), duration=np.float64(0.6068952561670052), pitch=47, velocity=4), Note(start=np.float64(3.600911853257

In [None]:
model = Model(
    d_model= 600,
    nhead=8,
    num_encoder_layers=6,
    num_decoder_layers=6,
    dim_feedforward=2400,
    dropout=0.1
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [3]:
model = Model.load("saves/model.pt").to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [37]:
train('nocturnes.pkl', 32, 1e-3, 10000, device, model, 100, dataset)

Using provided model
Step 100/10000, Avg Loss: -5.2737
Step 200/10000, Avg Loss: -5.6251
Step 300/10000, Avg Loss: -6.0349
Step 400/10000, Avg Loss: -6.3510
Step 500/10000, Avg Loss: -6.5198
Step 600/10000, Avg Loss: -6.6119
Step 700/10000, Avg Loss: -6.7382
Step 800/10000, Avg Loss: -6.7091
Step 900/10000, Avg Loss: -6.8591
Step 1000/10000, Avg Loss: -6.9007
Step 1100/10000, Avg Loss: -6.9503
Step 1200/10000, Avg Loss: -6.8440
Step 1300/10000, Avg Loss: -6.8919
Step 1400/10000, Avg Loss: -6.8922
Step 1500/10000, Avg Loss: -6.9990
Step 1600/10000, Avg Loss: -6.8425
Step 1700/10000, Avg Loss: -6.8959
Step 1800/10000, Avg Loss: -6.9180
Step 1900/10000, Avg Loss: -6.9049
Step 2000/10000, Avg Loss: -6.9116
Step 2100/10000, Avg Loss: -6.9880
Step 2200/10000, Avg Loss: -6.9276
Step 2300/10000, Avg Loss: -6.9726
Step 2400/10000, Avg Loss: -7.0226
Step 2500/10000, Avg Loss: -7.0119
Step 2600/10000, Avg Loss: -6.9741
Step 2700/10000, Avg Loss: -7.0239
Step 2800/10000, Avg Loss: -6.9820
Step 290

In [38]:
train('nocturnes.pkl', 32, 1e-4, 10000, device, model, 100, dataset)

Using provided model
Step 100/10000, Avg Loss: -7.2151
Step 200/10000, Avg Loss: -7.1944
Step 300/10000, Avg Loss: -7.1614
Step 400/10000, Avg Loss: -7.2454
Step 500/10000, Avg Loss: -7.2028
Step 600/10000, Avg Loss: -7.1898
Step 700/10000, Avg Loss: -7.2439
Step 800/10000, Avg Loss: -7.1740
Step 900/10000, Avg Loss: -7.2300
Step 1000/10000, Avg Loss: -7.1572
Step 1100/10000, Avg Loss: -7.2522
Step 1200/10000, Avg Loss: -7.2504
Step 1300/10000, Avg Loss: -7.1734
Step 1400/10000, Avg Loss: -7.1850
Step 1500/10000, Avg Loss: -7.2231
Step 1600/10000, Avg Loss: -7.2192
Step 1700/10000, Avg Loss: -7.3578
Step 1800/10000, Avg Loss: -7.3224
Step 1900/10000, Avg Loss: -7.2521
Step 2000/10000, Avg Loss: -7.2074
Step 2100/10000, Avg Loss: -7.2890
Step 2200/10000, Avg Loss: -7.2932
Step 2300/10000, Avg Loss: -7.1504
Step 2400/10000, Avg Loss: -7.2660
Step 2500/10000, Avg Loss: -7.2322
Step 2600/10000, Avg Loss: -7.2121
Step 2700/10000, Avg Loss: -7.3317
Step 2800/10000, Avg Loss: -7.3216
Step 290

In [4]:
notes, tokens = dataset[0]
generated_tokens = generate(model, notes, max_length=127, device=device)
generated_segs = tokens_to_segs(generated_tokens)
ground_truth_segs = tokens_to_segs(tokens)
fig = plot_list(ground_truth_segs, generated_segs, title='Test Generation', tokens=generated_tokens)
fig.show()
print(f"{len(generated_tokens)} tokens generated")
print(f"{len(tokens)} gt tokens")

63 tokens generated
23 gt tokens


In [4]:
fig = visualize_model(model, dataset, num_plots=24, device=device, exclude_context=False)
fig.show()