In [None]:
import os 
import sys
import json
from types import SimpleNamespace

import torch
import torchaudio
import torch.nn as nn
import numpy as np
import pandas as pd
import lightning as L
from tqdm import tqdm
from timm import create_model, list_models
from torch.utils.data import DataLoader
from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor
from lightning.pytorch.tuner import Tuner

sys.path.append('../src')

from custom.data import AudioDataset, DataModule
from custom.trainer import TrainModule
from custom.net import SimpleCNN
from custom.utils import batch_to_device

%load_ext autoreload
%autoreload 2

In [None]:
# Parameter-Definition
cfg = SimpleNamespace()                        # SimpleNamespace

cfg.wav_crop_len = 5                           # Length of cropped files in seconds
cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/' # Filepath
cfg.n_classes = 66                             # Number of classes
cfg.pretrained = True                          # Use pretrained model
cfg.backbone = 'tf_efficientnetv2_s.in21k'     # image classification model (from list_models)
cfg.in_chans = 1                               # Number of channels
cfg.num_workers = 4                            # Number of parallelized CPUs
cfg.include_val = True                         # Validation-set included / excluded
cfg.max_amp = False                            # Experimental feature

# Training Hyperparameters
cfg.n_epochs = 18                              # Number of epochs
cfg.lr = 0.0017                                # Learning rate
cfg.weight_decay = 1e-5                        # Weight decay
cfg.label_smoothing = 0.1                      # Label smoothing
cfg.batch_size = 32                            # Batch size
cfg.sample_rate = 44100                        # Sample rate

# Mel Spectrogram Hyperparameters (parameters as documented in Torchaudio Documentation)
cfg.n_mels = 128
cfg.n_fft = 2048
cfg.fmin = 400
cfg.fmax = cfg.sample_rate / 2
cfg.window_size = cfg.n_fft
cfg.hop_length = int(cfg.n_fft / 2)
cfg.power = 2
cfg.top_db = 80.0

# Normalization
cfg.mel_normalized = True                      # Mel normalization as documented in Torchaudio (normalized=True)
cfg.minmax_norm = False                        # Apply minmax normalization on spectrograms

# Augmentation Parameters
cfg.impulse_prob = 0.15                        # Impulse probability
cfg.noise_prob = 0.15                          # Noise probability

cfg.max_noise = 0.04                           # Noiseinjection amplitude
cfg.min_snr = 5                                # signal-noise ratio (Gaussian & Pink Noise)
cfg.max_snr = 20

cfg.mixup = False                              # Apply mixup augmentation
cfg.specaug = False                            # Apply OneOf(MaskFrequency, MaskTime)
cfg.specaug_prob = 0.25                        # Probability to apply spectrogram augmentation
cfg.mixup_prob = 1                             # Parameter of a symmetric Beta distribution, 1=uniform distribution

if cfg.minmax_norm:
    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)

In [None]:
# Loss Function and class weights
class_weights = np.load(f'{full_path}/class_weights/class_weights_2.npy')
loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'),
                              label_smoothing=cfg.label_smoothing)

# Data Logic, Loading, Augmentation
dm = DataModule(cfg=cfg)

# Network
model = SimpleCNN(cfg)

# Training Logic
tmod = TrainModule(model,
                   loss_fn=loss_fn,
                   optimizer_name='Adam',
                   optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                   cfg=cfg)

In [None]:
trainer = L.Trainer(
    max_epochs=20,
    accelerator="auto",
    devices="auto",
    enable_checkpointing=True,
    reload_dataloaders_every_n_epochs=False)

In [None]:
# Adjust min_lr and max_lr to define lr search space
# num_trainings defines the granularity
tuner = Tuner(trainer)
lr_finder = tuner.lr_find(model=tmod, datamodule=dm, min_lr=1e-7, max_lr=1e-2, num_training=100, attr_name="cfg")

In [None]:
# Results can be found in
print(lr_finder.results)

# Plot with
fig = lr_finder.plot(suggest=True)
fig.show()

# Pick point based on plot, or get suggestion
new_lr = lr_finder.suggestion()