In [10]:
%pip install -r ../../requirements.txt

In [3]:
import os 
import sys
import json
from types import SimpleNamespace
import random
from glob import glob
import warnings
warnings.simplefilter("ignore", UserWarning)

import torch
import torchaudio
import torch.nn as nn
import numpy as np
import pandas as pd
import lightning as L
from tqdm import tqdm
from timm import create_model, list_models
from torch.utils.data import DataLoader
from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor

sys.path.append('../../src')

from custom.data import AudioDataset, DataModule
from custom.trainer import TrainModule
from custom.net import SimpleCNN
from custom.utils import batch_to_device
from custom.eval import inference_k_random, error_analysis


# determinism
# seed = 1337
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)
%load_ext autoreload
%autoreload 2

full_path = '../../'

In [7]:
list_models()

In [3]:
cfg = SimpleNamespace()

cfg.data_path = f'{full_path}/data/production_data/5s_crop/'
cfg.n_classes = 66
cfg.pretrained = True
cfg.backbone = 'tf_efficientnetv2_s.in21k'
cfg.in_chans = 1
cfg.num_workers = 4
cfg.include_val = True

# Training Hyperparameters
cfg.n_epochs = 1
cfg.lr = 0.000251
cfg.weight_decay = 1e-4
cfg.label_smoothing = 0.1
cfg.batch_size = 32
cfg.wav_crop_len = 5
cfg.sample_rate = 44100

# Mel Spectogram Hyperparameters
cfg.n_mels = 128
cfg.n_fft = 2048
cfg.fmin = 300
cfg.fmax = cfg.sample_rate / 2 
cfg.window_size = cfg.n_fft
cfg.hop_length = int(cfg.n_fft / 2)
cfg.power = 2
cfg.top_db = 80.0

# Augmentation Parameters
cfg.impulse_prob = 0.2
cfg.noise_prob = 0.2

cfg.max_noise = 0.04
cfg.min_snr = 5
cfg.max_snr = 20

cfg.exp_name = f'example'

In [4]:
# Callbacks
tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
lr_monitor = LearningRateMonitor(logging_interval='epoch')
checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

# Loss Function and class weights
class_weights = np.load(f'{full_path}/data/class_weights.npy')
loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                              label_smoothing=cfg.label_smoothing)

# Data Logic, Loading, Augmentation
dm = DataModule(cfg=cfg)

# Network
model = SimpleCNN(cfg)

# Training Logic
tmod = TrainModule(model, 
                   loss_fn=loss_fn, 
                   optimizer_name='Adam', 
                   optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                   cfg=cfg)

In [5]:
trainer = L.Trainer(
    max_epochs=cfg.n_epochs,
    accelerator="auto",
    devices="auto",
    enable_checkpointing=True,
    reload_dataloaders_every_n_epochs=False,
    logger=tb_logger,
    callbacks=[early_stop_callback, lr_monitor, checkpoint_callback]
)
trainer.fit(tmod, dm)

In [None]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'effnet_baseline/{cfg.exp_name}/'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
#
k_predictions = 25

val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
val_df = val_df[val_df['subset']=='validation']
val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'{full_path}/data/test/{x}')

for data in [(val_df, 'val'), (test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, 
                                 loss_fn, label_path, k=k_predictions)
    pred_df.to_csv(f'{exp_path}/{dset}_predictions.csv', index=False) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)

In [65]:
for i in range(5):
    print(i)

# RUNS WITH NEW PRETRAINED MODELS

In [None]:
models = ['tf_efficientnetv2_s.in21k','efficientnet_b0','inception_v3','resnet50','vgg16']
lrs = [0.000251,]
for i in range(5):
    cfg = SimpleNamespace()

    cfg.data_path = f'{full_path}/data/production_data/5s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = models[i]
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 100
    cfg.lr = lrs[i]
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.wav_crop_len = 5
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = cfg.sample_rate / 2 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.exp_name = f'lukas_{models[i]}'
    
    # Callbacks
    tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
    early_stop_callback = EarlyStopping(monitor="val_f1", patience=10, verbose=False, mode="max")

    # Loss Function and class weights
    class_weights = np.load(f'{full_path}/data/class_weights.npy')
    loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                                  label_smoothing=cfg.label_smoothing)

    # Data Logic, Loading, Augmentation
    dm = DataModule(cfg=cfg)

    # Network
    model = SimpleCNN(cfg)

    # Training Logic
    tmod = TrainModule(model, 
                       loss_fn=loss_fn, 
                       optimizer_name='Adam', 
                       optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                       cfg=cfg)
    
    trainer = L.Trainer(
        max_epochs=cfg.n_epochs,
        accelerator="auto",
        devices="auto",
        enable_checkpointing=True,
        reload_dataloaders_every_n_epochs=False,
        logger=tb_logger,
        callbacks=[early_stop_callback, lr_monitor, checkpoint_callback]
    )
    trainer.fit(tmod, dm)
    
    # glob(f'{exp_path}/checkpoints/*')
    exp_path = f'effnet_baseline/{cfg.exp_name}/'
    label_path = f'{full_path}/data/labels.json'
    state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
    #
    k_predictions = 25

    val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
    val_df = val_df[val_df['subset']=='validation']
    val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

    test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
    test_df['path'] = test_df['file_name'].apply(lambda x: f'{full_path}/data/test/{x}')

    for data in [(val_df, 'val'), (test_df, 'test')]:
        df, dset = data
        print(f'Predict {dset}')
        pred_ds = AudioDataset(df, mode='test', cfg=cfg)
        pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
        pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, 
                                     loss_fn, label_path, k=k_predictions)
        pred_df.to_csv(f'{exp_path}/{dset}_predictions.csv', index=False) 
        if dset == 'test':
            pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
        elif dset == 'val':
            error_analysis(exp_path)

# PREDICTION TESTS

x = [1,2,3,4]

normalize([x]) # array([[0.18257419, 0.36514837, 0.54772256, 0.73029674]])

normalize([x], norm="l1") # array([[0.1, 0.2, 0.3, 0.4]])

normalize([x], norm="max") # array([[0.25, 0.5 , 0.75, 1.]])

In [11]:
cfg = SimpleNamespace()

#cfg.data_path = f'{full_path}/data/production_data/5s_crop/'
cfg.n_classes = 66
cfg.pretrained = True
cfg.backbone = 'tf_efficientnetv2_s.in21k'
cfg.in_chans = 1
cfg.num_workers = 4
cfg.include_val = True

# Training Hyperparameters
cfg.n_epochs = 1
cfg.lr = 0.000251
cfg.weight_decay = 1e-4
cfg.label_smoothing = 0.1
cfg.batch_size = 32
cfg.wav_crop_len = 5
cfg.sample_rate = 44100

# Mel Spectogram Hyperparameters
cfg.n_mels = 128
cfg.n_fft = 2048
cfg.fmin = 300
cfg.fmax = cfg.sample_rate / 2 
cfg.window_size = cfg.n_fft
cfg.hop_length = int(cfg.n_fft / 2)
cfg.power = 2
cfg.top_db = 80.0

# Augmentation Parameters
cfg.impulse_prob = 0.2
cfg.noise_prob = 0.2

cfg.max_noise = 0.04
cfg.min_snr = 5
cfg.max_snr = 20

#cfg.exp_name = f'example'

class_weights = np.load(f'{full_path}/data/class_weights.npy')
#loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'),label_smoothing=cfg.label_smoothing) # if gpu
loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights), label_smoothing=cfg.label_smoothing) # if no gpu

## as is

In [12]:
full_path

In [62]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'{full_path}/notebooks/Dominik/effnet_baseline/bench_7.5'
exp_path_Lukas = f'{full_path}/notebooks/Lukas/bench_7.5'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
#
k_predictions = 101

#val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
#val_df = val_df[val_df['subset']=='validation']
#val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'{full_path}/data/test/{x}')

for data in [(test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, 
                                 loss_fn, label_path, k=k_predictions)
    #pred_df.to_csv(f'{exp_path_Lukas}/{dset}_predictions.csv', index=False) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path_Lukas}/submission_predictions_k101.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)

In [24]:
from sklearn.preprocessing import normalize

x = [1,2,3,4]
x1 = normalize([x]) # array([[0.18257419, 0.36514837, 0.54772256, 0.73029674]])
#normalize([x], norm="l1") # array([[0.1, 0.2, 0.3, 0.4]])
#normalize([x], norm="max") # array([[0.25, 0.5 , 0.75, 1.]])
x1

## normalize

In [58]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'{full_path}/notebooks/Dominik/effnet_baseline/bench_7.5'
exp_path_Lukas = f'{full_path}/notebooks/Lukas/bench_7.5'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
#
k_predictions = 25

#val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
#val_df = val_df[val_df['subset']=='validation']
#val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'{full_path}/data/test/{x}')

for data in [(test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, 
                                 loss_fn, label_path, k=k_predictions)
    #pred_df.to_csv(f'{exp_path_Lukas}/{dset}_predictions.csv', index=False) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path_Lukas}/submission_predictions_normalize.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)

In [50]:
l = np.array([-0.0634237,  0.320624 ])
from sklearn.preprocessing import normalize
l1 = normalize([l])
l1

## normalize l1

In [59]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'{full_path}/notebooks/Dominik/effnet_baseline/bench_7.5'
exp_path_Lukas = f'{full_path}/notebooks/Lukas/bench_7.5'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
#
k_predictions = 25

#val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
#val_df = val_df[val_df['subset']=='validation']
#val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'{full_path}/data/test/{x}')

for data in [(test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, 
                                 loss_fn, label_path, k=k_predictions)
    #pred_df.to_csv(f'{exp_path_Lukas}/{dset}_predictions.csv', index=False) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path_Lukas}/submission_predictions_normalize_l1.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)

## normalize max

In [61]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'{full_path}/notebooks/Dominik/effnet_baseline/bench_7.5'
exp_path_Lukas = f'{full_path}/notebooks/Lukas/bench_7.5'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
#
k_predictions = 101

#val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
#val_df = val_df[val_df['subset']=='validation']
#val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'{full_path}/data/test/{x}')

for data in [(test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, 
                                 loss_fn, label_path, k=k_predictions)
    #pred_df.to_csv(f'{exp_path_Lukas}/{dset}_predictions.csv', index=False) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path_Lukas}/submission_predictions_normalize_max.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)