# Codebase - Custom Transformer Training

In [1]:
import torch
from codebase.model import Model
from codebase.data import Dataset
from codebase.train import train, train_exhaustively
from codebase.inference import generate, tokens_to_segs
from codebase.utils import load_dataset, visualize_model, visualize_teacher_forcing, plot_loss
from codebase.preprocessing import create_dataset
from codebase.evaluate import evaluate

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

Using device: cuda


In [None]:
'''
dataset = create_dataset(
    split="test",
    dataset_path="maestro-v3.0.0", 
    seg_fit_tightness=0.12,
    nocturnes=False,
    track_idx=None,
    num_workers=-1,
    max_chunk_size_mb=400
    )
'''

Loading paths...
Found 177 tracks
Using 8 parallel workers
[1/177] MIDI-Unprocessed_11_R1_2009_06-09_ORIG_MID--AUDIO_11_R1_2009_11_R1_2009_07_WAV.midi | 10 augmented versions | Avg: 9.3s/track | ETA: 27.2min
[2/177] MIDI-Unprocessed_02_R1_2009_03-06_ORIG_MID--AUDIO_02_R1_2009_02_R1_2009_04_WAV.midi | 10 augmented versions | Avg: 5.4s/track | ETA: 15.7min
[3/177] MIDI-Unprocessed_11_R1_2009_01-05_ORIG_MID--AUDIO_11_R1_2009_11_R1_2009_04_WAV.midi | 10 augmented versions | Avg: 4.3s/track | ETA: 12.4min
[4/177] MIDI-Unprocessed_SMF_17_R1_2004_03-06_ORIG_MID--AUDIO_20_R2_2004_12_Track12_wav--1.midi | 10 augmented versions | Avg: 3.3s/track | ETA: 9.6min
[5/177] MIDI-Unprocessed_24_R1_2006_01-05_ORIG_MID--AUDIO_24_R1_2006_03_Track03_wav.midi | 10 augmented versions | Avg: 3.3s/track | ETA: 9.4min
[6/177] MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_04_Track04_wav.midi | 10 augmented versions | Avg: 2.9s/track | ETA: 8.2min
[7/177] MIDI-Unprocessed_04_R3_2011_MID--AUDIO_R3-D2

In [2]:
dataset = load_dataset("400Mb/chunk_0.pkl")
print(f"Dataset loaded: {len(dataset)} tracks")

Dataset loaded: 1048 tracks


In [4]:
model = Model.load('param_loss_test.pt', device=device)

In [3]:
dict = evaluate(
    model=Model.load(path="alpha_0.pt", device=device),
    dataset=dataset,
    num_samples=100,
    device=device,
    threshold=0.3,
    seed=0
)
print(dict)

{'correlation': np.float64(-0.02245402646990418), 'binary': {'tp': np.int64(68036), 'tn': np.int64(1548), 'fp': np.int64(28692), 'fn': np.int64(1724)}, 'num_tokens_ratio': np.float64(2.523698076223493)}


In [2]:
model = Model(528, 6, 6, 8, 2112, 0.1).to(device)

In [None]:
train(
    batch_size=64,
    lr=1e-4,
    num_steps=500,
    device=device,
    model=model,
    print_every=1,
    dataset=dataset,
    model_path="nocturnes_unnormalized.pt",
    alpha=0
)

Using provided model
Creating dataloader...
Starting training loop...
Step 1/500
  Total: 0.6375 | Segment: 0.0000 | Param: 0.6375
  Height: 0.0653 | Amount: 0.0124 | Time: 0.9889
Step 2/500
  Total: 0.9985 | Segment: 0.0000 | Param: 0.9985
  Height: 0.0972 | Amount: 0.0155 | Time: 1.5771
Step 3/500
  Total: 0.7791 | Segment: 0.0000 | Param: 0.7791
  Height: 0.0873 | Amount: 0.0161 | Time: 1.1768
Step 4/500
  Total: 0.7765 | Segment: 0.0000 | Param: 0.7765
  Height: 0.0724 | Amount: 0.0150 | Time: 1.2335
Step 5/500
  Total: 0.7534 | Segment: 0.0000 | Param: 0.7534
  Height: 0.0764 | Amount: 0.0137 | Time: 1.1738
Step 6/500
  Total: 0.7340 | Segment: 0.0000 | Param: 0.7340
  Height: 0.0707 | Amount: 0.0134 | Time: 1.1584
Step 7/500
  Total: 0.7313 | Segment: 0.0000 | Param: 0.7313
  Height: 0.0770 | Amount: 0.0118 | Time: 1.1312
Step 8/500
  Total: 0.6545 | Segment: 0.0000 | Param: 0.6545
  Height: 0.0719 | Amount: 0.0144 | Time: 0.9928
Step 9/500
  Total: 0.6410 | Segment: 0.0000 | Par

KeyboardInterrupt: 

In [None]:
train_exhaustively(
    batch_size=50,
    lr=3e-4,
    num_steps=4000,
    device="cuda",
    model=Model(528, 6, 6, 8, 2112, 0.1).to(device),
    print_every=2000,
    model_path="complete_392000.pt",
    dataset_path="complete_dataset",
    accumulation_steps=2,
    num_rotations=2,
    num_workers=4,
    alpha=0.7,
    add_checkpoints=10000,
    record_loss=100
    )

# 14700 nach 190min

Found 49 chunk files in complete_dataset
Training: warmup=19600, effective_batch=50x2=100, workers=3, rotations=2
Total steps: 4000 steps/chunk × 49 chunks × 2 rotations = 392000 steps
Optimizations: mixed_precision=ON, cudnn_benchmark=ON
Loading chunk 1/49...
Creating dataloader for chunk 1...
Starting training on chunk 1...


KeyboardInterrupt: 

In [11]:
fig = visualize_teacher_forcing(model, dataset)
fig.show()

In [18]:
fig = visualize_model(
    model=model,
    dataset=dataset,
    num_plots=12,
    device=device,
    exclude_context=False,
    show_notes=False,
    generate=True,
    seed=None
    )

In [19]:
fig.show()

In [4]:
plot_loss("400Mb_a0.pkl", detailed=True, plot_lr=False, title="Training Loss (alpha = 0)")

In [7]:
evaluate(
    model=model,
    dataset=dataset,
    num_samples=100,
    device=device,
    threshold=0.5,
    seed=None
    )

  c /= stddev[:, None]
  c /= stddev[None, :]
  recall = tp / (tp + fn)


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 0 and the array at index 1 has size 1000