# Prepare Data

In [None]:
from utils.data import ParseTaggedDataset

full_dataset, full_masks, full_keys = ParseTaggedDataset("")

In [1]:
from utils.data import MakeTripletDataset

train_dataloader, test_dataloader = MakeTripletDataset(sample_length=256, batch_size=24)

In [1]:
from utils.data import createDictionary

metadata = createDictionary()

In [8]:
metadata[('track_0000951')]

{'TRACK_ID': 'track_0000951',
 'ARTIST_ID': 'artist_000087',
 'ALBUM_ID': 'album_000149',
 'PATH': '51/951.mp3',
 'DURATION': '199.7',
 'TAGS': 'mood/theme---background'}

In [2]:
import torch

class Config:
    # === General ===
    model_name = "my_model"
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float32
    save_path = "./logs"
    seed = 42

    # === Training ===
    num_epochs = 30
    batch_size = 24
    learning_rate = 5e-5
    weight_decay = 1e-5
    warmup_percent = 0.15
    max_grad_norm = 1.0
    log_every = 10  # steps between logs (optional)
    save_checkpoints = True

    # === Dataset ===
    dataset_path = "./data"
    use_masks = True
    num_workers = 4
    val_split = 0.2
    shuffle = True

    # === Model Behavior ===
    variational = False
    autoregressive = False

    # === Loss Coefficients ===
    beta_schedule = "log"   # e.g., "log", "linear", etc. (for getBetaLog)
    beta_max = 1.0
    cycle_length = 2
    contrastive_coeff = 0.1  # if using contrastive loss
    margin = 0.1

# Initialize Model

In [3]:
from models.AudioTransformer import AudioTransformer
from utils.misc import model_size

# ==== Model & Optimizer ====
num_heads = 16
num_layers = 16
encoder_layers = 16
decoder_layers = 5
d_model = 256
latent_space = 512
dim_feedforward = 1024
sample_length = 256
projection_dim = 128
dropout = 0.1

name_extension = ""

model = AudioTransformer(d_model=d_model, num_heads=num_heads, encoder_layers=encoder_layers, decoder_layers=decoder_layers, dim_feedforward=dim_feedforward, latent_space=latent_space, length=sample_length, dropout=dropout, name_extension=name_extension)
print(f"Parameters: {model_size(model)}")

Parameters: 85195840


In [4]:
from torch import optim
from training.training import trainTriplet

optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-6)
trainTriplet(model, train_dataloader, test_dataloader, optimizer, Config, device=Config.device)

Epoch 1/30: 100%|██████████| 6330/6330 [20:36<00:00,  5.12it/s]
Evaluating: 100%|██████████| 704/704 [00:46<00:00, 15.15it/s]


[Epoch 1] Train: 1.3852 | Triplet: 0.0000 Cos: 0.4368, MSE: 0.7315


Epoch 2/30: 100%|██████████| 6330/6330 [20:08<00:00,  5.24it/s]
Evaluating: 100%|██████████| 704/704 [00:41<00:00, 17.03it/s]


[Epoch 2] Train: 1.1299 | Triplet: 0.0000 Cos: 0.3744, MSE: 0.6907


Epoch 3/30: 100%|██████████| 6330/6330 [20:01<00:00,  5.27it/s]
Evaluating: 100%|██████████| 704/704 [00:41<00:00, 16.87it/s]


[Epoch 3] Train: 1.0532 | Triplet: 0.0000 Cos: 0.3446, MSE: 0.6705


Epoch 4/30: 100%|██████████| 6330/6330 [20:17<00:00,  5.20it/s]
Evaluating: 100%|██████████| 704/704 [00:42<00:00, 16.62it/s]


[Epoch 4] Train: 1.0140 | Triplet: 0.0000 Cos: 0.3267, MSE: 0.6591


Epoch 5/30: 100%|██████████| 6330/6330 [20:21<00:00,  5.18it/s]
Evaluating: 100%|██████████| 704/704 [00:41<00:00, 17.11it/s]


[Epoch 5] Train: 0.9895 | Triplet: 0.0000 Cos: 0.3169, MSE: 0.6524


Epoch 6/30: 100%|██████████| 6330/6330 [20:30<00:00,  5.14it/s]
Evaluating: 100%|██████████| 704/704 [00:41<00:00, 17.06it/s]


[Epoch 6] Train: 0.9724 | Triplet: 0.0000 Cos: 0.3102, MSE: 0.6453


Epoch 7/30: 100%|██████████| 6330/6330 [20:21<00:00,  5.18it/s]
Evaluating: 100%|██████████| 704/704 [00:41<00:00, 17.14it/s]


[Epoch 7] Train: 0.9612 | Triplet: 0.0000 Cos: 0.3103, MSE: 0.6444


Epoch 8/30:   6%|▌         | 389/6330 [01:15<19:12,  5.16it/s]


KeyboardInterrupt: 