In [None]:
import os 
import sys
import json
from types import SimpleNamespace
import random
from glob import glob
import warnings
warnings.simplefilter("ignore", UserWarning)

import torch
import torchaudio
import torch.nn as nn
import numpy as np
import pandas as pd
import lightning as L
from tqdm import tqdm
from timm import create_model, list_models
from torch.utils.data import DataLoader
from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor

sys.path.append('../src')

from custom.data import AudioDataset, DataModule
from custom.trainer import TrainModule
from custom.net import SimpleCNN
from custom.utils import batch_to_device, get_min_max
from custom.eval import inference_k_random, error_analysis, inference_all


# Determinism (comment out for a deterministic run) 
# seed = 1337
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)
%load_ext autoreload
%autoreload 2

full_path = '../'

In [None]:
# Parameter-Definition
cfg = SimpleNamespace()                        # SimpleNamespace

cfg.wav_crop_len = 5                           # Length of cropped files in seconds
cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/' # Filepath 
cfg.n_classes = 66                             # Number of classes 
cfg.pretrained = True                          # Use pretrained model
cfg.backbone = 'tf_efficientnetv2_s.in21k'     # image classification model (from list_models)
cfg.in_chans = 1                               # Number of channels
cfg.num_workers = 4                            # Number of parallelized CPUs
cfg.include_val = True                         # Validation-set included / excluded 
cfg.max_amp = False                            # Experimental feature

# Training Hyperparameters
cfg.n_epochs = 18                              # Number of epochs
cfg.lr = 0.0017                                # Learning rate
cfg.weight_decay = 1e-5                        # Weight decay
cfg.label_smoothing = 0.1                      # Label smoothing
cfg.batch_size = 32                            # Batch size
cfg.sample_rate = 44100                        # Sample rate

# Mel Spectrogram Hyperparameters
# (parameters as documented in Torchaudio Documentation)
cfg.n_mels = 128                               
cfg.n_fft = 2048                               
cfg.fmin = 400                                 
cfg.fmax = cfg.sample_rate / 2
cfg.window_size = cfg.n_fft                    
cfg.hop_length = int(cfg.n_fft / 2)            
cfg.power = 2                                  
cfg.top_db = 80.0                              

# Normalization
cfg.mel_normalized = True                      # Normalizing as defined in get_min_max 
cfg.minmax_norm = False                        # Apply minmax normalization on spectrograms

# Augmentation Parameters
cfg.impulse_prob = 0.2                         # Impulse probability
cfg.noise_prob = 0.2                           # Noise probability

cfg.max_noise = 0.04                           # Noiseinjection amplitude
cfg.min_snr = 5                                # signal-noise ratio (Gaussian & Pink Noise)
cfg.max_snr = 20

cfg.mixup = False                              # Apply mixup augmentation
cfg.specaug = False                            # Apply OneOf(MaskFrequency, MaskTime)
cfg.specaug_prob = 0.25                        # Probability to apply spectrogram augmentation
cfg.mixup_prob = 1                             # Parameter of a symmetric Beta distribution, 1=uniform distribution

if cfg.minmax_norm:
    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)

# Define experiment name
cfg.exp_name = f'mel_normalized_5'

In [None]:
# Callbacks
tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
lr_monitor = LearningRateMonitor(logging_interval='epoch')
checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

# Loss Function and class weights
class_weights = np.load(f'{full_path}/class_weights/class_weights_2.npy')
loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                              label_smoothing=cfg.label_smoothing)

# Data Logic, Loading, Augmentation
dm = DataModule(cfg=cfg)

# Network
model = SimpleCNN(cfg)

# Training Logic
tmod = TrainModule(model, 
                   loss_fn=loss_fn, 
                   optimizer_name='Adam', 
                   optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                   cfg=cfg)

In [None]:
# Start training run
trainer = L.Trainer(
    max_epochs=cfg.n_epochs,
    accelerator="auto",
    devices="auto",
    enable_checkpointing=True,
    reload_dataloaders_every_n_epochs=False,
    logger=tb_logger,
    callbacks=[lr_monitor, checkpoint_callback]
)
trainer.fit(tmod, dm)

In [None]:
# Define paths
exp_path = f'effnet_baseline/{cfg.exp_name}/'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"

# Parameter for inference k-random
k_predictions = 25

# Create validation dataframe
val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
val_df = val_df[val_df['subset']=='validation']
val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

# Create test dataframe
test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'../data/test/{x}')

# Fill dataframes and compute error analysis for validation set
for data in [(val_df, 'val'), (test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df, pred = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, k=k_predictions)
    pred_df.to_csv(f'{exp_path}/{dset}_predictions_k-random.csv', index=False) 
    np.save(f'{exp_path}/{dset}_predictions_k-random.npy', pred) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
        pred_df, pred = inference_all(SimpleCNN(cfg), state_dict_path, test_df, cfg, f'{cfg.data_path}/test')
        pred_df.to_csv(f'{exp_path}/{dset}_predictions_all.csv', index=False) 
        np.save(f'{exp_path}/{dset}_predictions_all.npy', pred) 
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions_all.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)