# Codebase - Custom Transformer Training

In [1]:
import torch
from codebase.model import Model
from codebase.data import Dataset
from codebase.train import train, train_exhaustively
from codebase.inference import generate, tokens_to_segs
from codebase.utils import load_dataset, visualize_model, visualize_teacher_forcing, plot_loss
from codebase.preprocessing import create_dataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

Using device: cuda


In [None]:
# dataset = create_dataset("train", "maestro-v3.0.0", "nocturnes.pkl", 0.12, True, None, -1, 80)

Loading paths...
Found 19 tracks
Using 8 parallel workers
[1/19] MIDI-Unprocessed_R1_D1-9-12_mid--AUDIO-from_mp3_12_R1_2015_wav--3.midi | 10 augmented versions | Avg: 10.1s/track | ETA: 3.0min
[2/19] MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3_04_R1_2015_wav--3.midi | 10 augmented versions | Avg: 5.2s/track | ETA: 1.5min
[3/19] MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3_01_R1_2015_wav--2.midi | 10 augmented versions | Avg: 3.9s/track | ETA: 1.0min
[4/19] MIDI-Unprocessed_R1_D2-21-22_mid--AUDIO-from_mp3_22_R1_2015_wav--4.midi | 10 augmented versions | Avg: 3.1s/track | ETA: 0.8min
[5/19] MIDI-Unprocessed_22_R2_2011_MID--AUDIO_R2-D5_10_Track10_wav.midi | 10 augmented versions | Avg: 2.5s/track | ETA: 0.6min
[6/19] MIDI-Unprocessed_072_PIANO072_MID--AUDIO-split_07-08-17_Piano-e_1-06_wav--3.midi | 10 augmented versions | Avg: 2.2s/track | ETA: 0.5min
[7/19] MIDI-Unprocessed_058_PIANO058_MID--AUDIO-split_07-07-17_Piano-e_2-02_wav--4.midi | 10 augmented versions | Avg: 1.9s/track | ET

In [7]:
dataset = load_dataset("nocturnes/chunk_0.pkl")
print(f"Dataset loaded: {len(dataset)} tracks")

Dataset loaded: 188 tracks


In [2]:
model = Model.load('complete_model.pt', device=device)

In [2]:
model = Model(528, 6, 6, 8, 2112, 0.1).to(device)

In [None]:
train(
    batch_size=64,
    lr=1e-4,
    num_steps=500,
    device=device,
    model=model,
    print_every=1,
    dataset=dataset,
    model_path="nocturnes_unnormalized.pt",
    alpha=0
)

Using provided model
Creating dataloader...
Starting training loop...
Step 1/500
  Total: 0.6375 | Segment: 0.0000 | Param: 0.6375
  Height: 0.0653 | Amount: 0.0124 | Time: 0.9889
Step 2/500
  Total: 0.9985 | Segment: 0.0000 | Param: 0.9985
  Height: 0.0972 | Amount: 0.0155 | Time: 1.5771
Step 3/500
  Total: 0.7791 | Segment: 0.0000 | Param: 0.7791
  Height: 0.0873 | Amount: 0.0161 | Time: 1.1768
Step 4/500
  Total: 0.7765 | Segment: 0.0000 | Param: 0.7765
  Height: 0.0724 | Amount: 0.0150 | Time: 1.2335
Step 5/500
  Total: 0.7534 | Segment: 0.0000 | Param: 0.7534
  Height: 0.0764 | Amount: 0.0137 | Time: 1.1738
Step 6/500
  Total: 0.7340 | Segment: 0.0000 | Param: 0.7340
  Height: 0.0707 | Amount: 0.0134 | Time: 1.1584
Step 7/500
  Total: 0.7313 | Segment: 0.0000 | Param: 0.7313
  Height: 0.0770 | Amount: 0.0118 | Time: 1.1312
Step 8/500
  Total: 0.6545 | Segment: 0.0000 | Param: 0.6545
  Height: 0.0719 | Amount: 0.0144 | Time: 0.9928
Step 9/500
  Total: 0.6410 | Segment: 0.0000 | Par

KeyboardInterrupt: 

In [None]:
train_exhaustively(
    batch_size=50,
    lr=3e-4,
    num_steps=4000,
    device="cuda",
    model=model,
    print_every=100,
    model_path="complete_392000.pt",
    dataset_path="complete_dataset",
    accumulation_steps=2,
    num_rotations=2,
    num_workers=4,
    alpha=0.7,
    add_checkpoints=10000,
    record_loss=100
    )

Found 49 chunk files in complete_dataset
Training: warmup=19600, effective_batch=50x2=100, workers=4, rotations=2
Total steps: 4000 steps/chunk × 49 chunks × 2 rotations = 392000 steps
Optimizations: mixed_precision=ON, cudnn_benchmark=ON
Loading chunk 1/49...
Creating dataloader for chunk 1...
Starting training on chunk 1...


In [32]:
fig = visualize_teacher_forcing(model, dataset)
fig.show()

In [22]:
fig = visualize_model(model, dataset, 12, "cuda", False, False, True)

In [23]:
fig.show()

In [4]:
plot_loss("test.pt_loss_history.pkl", False)