# Audio Weld Defect Classification — Training Notebook

Instantiate the `AudioCNN` classifier, dataset, loss, optimizer and train
using the existing `run_training` loop.

In [1]:
import json
import os

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split

## 1. Load Config

In [2]:
with open("configs/audio_config.json") as f:
    cfg = json.load(f)

audio_cfg = cfg["audio"]
model_cfg = cfg["model"]
optim_cfg = cfg["optimizer"]
train_cfg = cfg["training"]
data_cfg = cfg["data"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
print(json.dumps(cfg, indent=2))

Device: cuda
{
  "audio": {
    "sampling_rate": 16000,
    "n_fft": 1024,
    "frame_length_in_s": 0.04,
    "frame_step_in_s": 0.02,
    "n_mels": 40,
    "f_min": 0,
    "f_max": 8000,
    "max_length_in_s": 38.0,
    "normalize": true
  },
  "model": {
    "num_classes": 7,
    "dropout": 0.3
  },
  "optimizer": {
    "type": "Adam",
    "lr": 0.001,
    "weight_decay": 0.0001
  },
  "training": {
    "num_epochs": 30,
    "batch_size": 16,
    "patience": 7,
    "val_split": 0.2,
    "seed": 42,
    "num_workers": 4,
    "checkpoint_dir": "checkpoints/audio"
  },
  "data": {
    "data_root": "sampleData",
    "test_root": "/data1/malto/therness/data/audio_test"
  }
}


## 2. Dataset

In [3]:
from audio_processing import AudioDataset

full_dataset = AudioDataset(data_cfg["data_root"], cfg=audio_cfg, labeled=False)
num_classes = len(full_dataset.label_to_idx)

print(f"Total samples: {len(full_dataset)}")
print(f"Classes ({num_classes}): {full_dataset.label_to_idx}")

# Store label map for later use
cfg["label_map"] = full_dataset.idx_to_label

Total samples: 10
Classes (0): {}


In [4]:
# Train / val split
val_size = int(len(full_dataset) * train_cfg["val_split"])
train_size = len(full_dataset) - val_size

generator = torch.Generator().manual_seed(train_cfg["seed"])
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size], generator=generator)
print(f"Train: {train_size} | Val: {val_size}")

# Collate: AudioDataset returns dicts -> (inputs, targets) tuples
def train_collate_fn(batch):
    audios = torch.stack([item["audio"] for item in batch])   # (B, 1, n_mels, T)
    labels = torch.tensor([item["label"] for item in batch])  # (B,)
    return audios, labels

train_loader = DataLoader(
    train_dataset, batch_size=train_cfg["batch_size"], shuffle=True,
    num_workers=train_cfg["num_workers"], collate_fn=train_collate_fn,
)
val_loader = DataLoader(
    val_dataset, batch_size=train_cfg["batch_size"], shuffle=False,
    num_workers=train_cfg["num_workers"], collate_fn=train_collate_fn,
)

Train: 8 | Val: 2


In [11]:
a = next(iter(train_loader))
a[0]

tensor([[[[-0.2003, -0.3541,  0.2331,  ..., -0.6021, -0.6926, -0.6211],
          [-0.1795,  0.0968, -0.1423,  ..., -0.4333, -0.6678, -0.5548],
          [-0.2966, -0.1327, -0.0234,  ..., -0.3532, -0.8020, -0.7333],
          ...,
          [-1.8556, -1.7839, -1.8196,  ..., -2.0243, -2.0485, -1.9750],
          [-2.1485, -1.8102, -1.8020,  ..., -2.1441, -2.0766, -2.1682],
          [-2.0537, -1.9974, -2.0711,  ..., -2.1354, -2.2657, -2.1634]]],


        [[[-0.6331, -0.1505, -0.5399,  ..., -0.4737, -0.4126, -0.0767],
          [-0.1887, -0.1762, -0.5018,  ..., -0.6731, -0.5273, -0.1955],
          [-0.5690, -0.4494, -0.5261,  ..., -0.9991, -0.5571, -0.5513],
          ...,
          [-1.7909, -1.6095, -1.5983,  ..., -1.6290, -1.6648, -1.4818],
          [-1.8378, -1.7935, -1.7176,  ..., -1.6224, -1.6318, -1.7545],
          [-1.9110, -1.6786, -1.8664,  ..., -1.8265, -1.8980, -1.8672]]],


        [[[-0.1372, -0.5137, -0.1263,  ..., -0.4436, -0.7893, -0.2517],
          [-0.6127, -0.541

## 3. Model

In [5]:
from audio_model import AudioCNN

model = AudioCNN(num_classes=num_classes, dropout=model_cfg["dropout"])
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")

AudioCNN(
  (block1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout2d(p=0.3, inplace=False)
  )
  (block2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout2d(p=0.3, inplace=False)
  )
  (block3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout2d(p=0.3, inplace=Fal

  init.kaiming_uniform_(self.weight, a=math.sqrt(5))


## 4. Loss & Optimizer

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=optim_cfg["lr"],
    weight_decay=optim_cfg["weight_decay"],
)

## 5. Train

In [7]:
from run_train import run_training

checkpoint_dir = train_cfg["checkpoint_dir"]
os.makedirs(checkpoint_dir, exist_ok=True)

# Save config alongside checkpoints for reproducibility
with open(os.path.join(checkpoint_dir, "config.json"), "w") as f:
    json.dump(cfg, f, indent=2)

patience = train_cfg["patience"] if train_cfg["patience"] > 0 else None

results = run_training(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=train_cfg["num_epochs"],
    checkpoint_dir=checkpoint_dir,
    patience=patience,
    seed=train_cfg["seed"],
)


Epoch 1/30
----------------------------------------


Training:   0%|          | 0/1 [00:00<?, ?it/s]/pytorch/aten/src/ATen/native/cuda/Loss.cu:245: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [0,0,0] Assertion `t >= 0 && t < n_classes` failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:245: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [1,0,0] Assertion `t >= 0 && t < n_classes` failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:245: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [2,0,0] Assertion `t >= 0 && t < n_classes` failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:245: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [3,0,0] Assertion `t >= 0 && t < n_classes` failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:245: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [4,0,0] Assertion `t >= 0 && t < n_classes` failed.
/pytorch/aten/src/ATen/native/cuda/Loss.cu:245: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [5,0,0] Assertion `t 

AcceleratorError: CUDA error: device-side assert triggered
Search for `cudaErrorAssert' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


## 6. Training Curves

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 4))
plt.plot(results["train_losses"], label="Train")
plt.plot(results["val_losses"], label="Val")
plt.title("AudioCNN — CrossEntropy Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(checkpoint_dir, "audio_training_curve.png"), dpi=150)
plt.show()

print(f"Best epoch: {results['best_epoch']}")