In [1]:
from blocks.encoder import Encoder, VariationalEncoder
from blocks.pqmf import PQMF
from blocks.decoder import Generator
from blocks.latent_discriminator import LatentDiscriminator
import torch
import pytorch_lightning as pl
from aux import AudioDistanceV1
from dataset_lmdb import AudioDataset
from model import JeffVAE
from config import Config as config
import torch.multiprocessing as mp
import os
from torch.utils.data import DataLoader, Subset, random_split
from lightning.pytorch.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
full_dataset = AudioDataset(
        db_path='footsteps_sf_db_speed_grass',
        descriptors=config.DESCRIPTORS,
        attribute_name='attributes',
        nb_bins=config.NUM_BINS
    )

computing min and max...
min: -0.12417352199554443, max: 1.1036418676376343 computed
computing bins...
bins computed


In [3]:
def custom_collate(batch):
    audios, features, bin_values = zip(*batch)
    
    # Stack audios and features normally
    audios = torch.stack(audios)
    features = torch.stack(features) 
    
    # bin_values should be the same for all samples, so just take the first one
    # This prevents it from being batched
    bin_values = bin_values[0]  # Take first sample's bin_values
    
    return audios, features, bin_values

In [4]:
device = torch.device('cuda')

# Split dataset (e.g., 80% train, 10% val, 10% test)
train_size = int(0.8 * len(full_dataset))
val_size = int(0.1 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
    
train_dataset, val_dataset, test_dataset = random_split(
    full_dataset, 
    [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(42)  # For reproducibility
)
    
train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=0, collate_fn=custom_collate)
val_loader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=0, collate_fn=custom_collate)
test_loader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=0, collate_fn=custom_collate)
    
#initialize components
encoder = config.ENCODER
variational_encoder = config.VE
decoder = config.DECODER
pqmf = config.PQMF
latent_discriminator = config.LATENT_DISCRIMINATOR
    
#initialize model
model = JeffVAE(
        latent_size=config.LATENT_SIZE,
        encoder=variational_encoder,
        decoder=decoder,
        latent_discriminator=latent_discriminator,
        pqmf=pqmf,
        multiband_audio_distance=AudioDistanceV1,
        learning_rate=config.LEARNING_RATE  # Specify learning rate
    )
model.train_loader = train_loader
model = model.to(device)
    
#initialize optimizer
model.configure_optimizers()  # Set up the optimizer for the training step
    
logger = TensorBoardLogger("tb_logs", name="sfRAVE")
print('num epochs: ', config.NUM_EPOCHS)

# Save a checkpoint every 500 training steps
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints/v8",         # Directory to save checkpoints
    filename="sfRAVE-{step}",      # Filename template
    save_top_k=-1,                 
    every_n_train_steps=12000,
    monitor = 'recon_loss'
)
     # Initialize trainer
trainer = pl.Trainer(
        callbacks=[checkpoint_callback],
        max_epochs=config.NUM_EPOCHS,
        accelerator='gpu',  # Will automatically detect available hardware
        devices=1,
        logger=logger, 
        log_every_n_steps=100# Add TensorBoard logging
    )
    
    # Train the model
trainer.fit(model, train_loader, val_loader)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 5090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                     | Type                | Params | Mode 
-------------------------------------------------------------------------
0 | encoder                  | VariationalEncoder  | 10.4 M | train
1 | decoder                  | Generator           | 28.1 M | train
2 | latent_discriminator     | LatentDiscriminator | 438 K  | train
3 | pqmf                     | PQMF                | 0      | train
4 | multiband_audio_distance | AudioDistanceV1     | 0      | t

num epochs:  50000
Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/home/jed/miniconda3/envs/SemanticFader/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=27` in the `DataLoader` to improve performance.


                                                                           

/home/jed/miniconda3/envs/SemanticFader/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 16. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/home/jed/miniconda3/envs/SemanticFader/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=27` in the `DataLoader` to improve performance.


Epoch 169:  63%|██████▎   | 76/120 [00:07<00:04, 10.25it/s, v_num=1, loss=4.260, recon_loss=4.290, kl_loss=3.050, adv_loss=-0.64, dis_loss=0.000, val_loss=5.590, val_recon_loss=5.590, val_kl_loss=2.710, val_pred_loss=1.890]   


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [1]:
trainer.fit(model, train_loader, val_loader)

NameError: name 'trainer' is not defined