In [2]:
%pip install -r ../../requirements.txt

In [3]:
import os 
import sys
import json
from types import SimpleNamespace
import random
from glob import glob
import warnings
warnings.simplefilter("ignore", UserWarning)

import torch
import torchaudio
import torch.nn as nn
import numpy as np
import pandas as pd
import lightning as L
from tqdm import tqdm
from timm import create_model, list_models
from torch.utils.data import DataLoader
from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor

sys.path.append('../../src')

from Lukas_custom.data import AudioDataset, DataModule
from Lukas_custom.trainer import TrainModule
from Lukas_custom.net import SimpleCNN
from Lukas_custom.utils import batch_to_device, get_min_max
from Lukas_custom.eval import inference_k_random, error_analysis, inference_all


# determinism
# seed = 1337
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)
%load_ext autoreload
%autoreload 2

full_path = '../../'

In [8]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_19600/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

In [9]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_17640/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

In [10]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_16036/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

In [11]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_19600_more_epochs/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

In [7]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_16036_more_epochs/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

In [16]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_19600/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

In [17]:
noise = [0.1,0.15,0.3,0.5]
for i in noise:
    print(f'noise: {i}')
    df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_noise_{i}/val_evaluation.csv')
    df.drop(df.tail(3).index, inplace=True)
    m = df[['f1-score']].mean()
    print(m)

In [None]:
cfg = SimpleNamespace()

cfg.wav_crop_len = 4.5

cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
cfg.n_classes = 66
cfg.pretrained = True
cfg.backbone = 'tf_efficientnetv2_s.in21k'
cfg.in_chans = 1
cfg.num_workers = 4
cfg.include_val = False

# Training Hyperparameters
cfg.n_epochs = 5
cfg.lr = 0.000251
cfg.weight_decay = 1e-4
cfg.label_smoothing = 0.1
cfg.batch_size = 32
cfg.sample_rate = 44100

# Mel Spectogram Hyperparameters
cfg.n_mels = 128
cfg.n_fft = 2048
cfg.fmin = 300
cfg.fmax = cfg.sample_rate / 2 
cfg.window_size = cfg.n_fft
cfg.hop_length = int(cfg.n_fft / 2)
cfg.power = 2
cfg.top_db = 80.0

# Augmentation Parameters
cfg.impulse_prob = 0.2
cfg.noise_prob = 0.2

cfg.max_noise = 0.04
cfg.min_snr = 5
cfg.max_snr = 20

cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
cfg.exp_name = f'minmax_short'

In [10]:
# Callbacks
tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
lr_monitor = LearningRateMonitor(logging_interval='epoch')
checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

# Loss Function and class weights
class_weights = np.load(f'{full_path}/data/class_weights.npy')
loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                              label_smoothing=cfg.label_smoothing)

# Data Logic, Loading, Augmentation
dm = DataModule(cfg=cfg)

# Network
model = SimpleCNN(cfg)

# Training Logic
tmod = TrainModule(model, 
                   loss_fn=loss_fn, 
                   optimizer_name='Adam', 
                   optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                   cfg=cfg)

In [11]:
trainer = L.Trainer(
    max_epochs=cfg.n_epochs,
    accelerator="auto",
    devices="auto",
    enable_checkpointing=True,
    reload_dataloaders_every_n_epochs=False,
    logger=tb_logger,
    callbacks=[early_stop_callback, lr_monitor, checkpoint_callback]
)
trainer.fit(tmod, dm)

In [32]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'effnet_baseline/{cfg.exp_name}/'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
#
k_predictions = 2

val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
val_df = val_df[val_df['subset']=='validation']
val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'../../data/test/{x}')

for data in [(val_df, 'val'), (test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df, pred = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, k=k_predictions)
    pred_df.to_csv(f'{exp_path}/{dset}_predictions_k-random.csv', index=False) 
    np.save(f'{exp_path}/{dset}_predictions_k-random.npy', pred) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
        pred_df, pred = inference_all(SimpleCNN(cfg), state_dict_path, test_df, cfg, f'{cfg.data_path}/test')
        pred_df.to_csv(f'{exp_path}/{dset}_predictions_all.csv', index=False) 
        np.save(f'{exp_path}/{dset}_predictions_all.npy', pred) 
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions_all.csv', index=False)
    elif dset == 'val':
        error_analysis(exp_path)

# RUNS OF DIFFERENT PRETRAINED MODELS

In [None]:
models = ['tf_efficientnetv2_s.in21k','efficientnet_b0','inception_v3','resnet50','vgg16']
lrs = [0.000251,0.0005956621435290105, 0.0004, 0.0031622776601683803, 3.1622776601683795e-05]
for i in range(5):
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 4.5

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = models[i]
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 100
    cfg.lr = lrs[i]
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = cfg.sample_rate / 2 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_{models[i]}_{str(cfg.wav_crop_len)}s_crop'
    
    # Callbacks
    tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
    early_stop_callback = EarlyStopping(monitor="val_f1", patience=10, verbose=False, mode="max")

    # Loss Function and class weights
    class_weights = np.load(f'{full_path}/data/class_weights.npy')
    loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                                  label_smoothing=cfg.label_smoothing)

    # Data Logic, Loading, Augmentation
    dm = DataModule(cfg=cfg)

    # Network
    model = SimpleCNN(cfg)

    # Training Logic
    tmod = TrainModule(model, 
                       loss_fn=loss_fn, 
                       optimizer_name='Adam', 
                       optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                       cfg=cfg)
    
    trainer = L.Trainer(
        max_epochs=cfg.n_epochs,
        accelerator="auto",
        devices="auto",
        enable_checkpointing=True,
        reload_dataloaders_every_n_epochs=False,
        logger=tb_logger,
        callbacks=[early_stop_callback, lr_monitor, checkpoint_callback]
    )
    trainer.fit(tmod, dm)

In [9]:
models = ['tf_efficientnetv2_s.in21k','efficientnet_b0','inception_v3','resnet50','vgg16']
lrs = [0.000251,0.0005956621435290105, 0.0004, 0.0031622776601683803, 3.1622776601683795e-05]
for i in range(5):
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 4.5

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = models[i]
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 100
    cfg.lr = lrs[i]
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = cfg.sample_rate / 2 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_{models[i]}_{str(cfg.wav_crop_len)}s_crop'
    
        # glob(f'{exp_path}/checkpoints/*')
    exp_path = f'effnet_baseline/{cfg.exp_name}'
    label_path = f'{full_path}/data/labels.json'
    state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
    #
    k_predictions = 25

    val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
    val_df = val_df[val_df['subset']=='validation']
    val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

    test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
    test_df['path'] = test_df['file_name'].apply(lambda x: f'../../data/test/{x}')

    for data in [(val_df, 'val'), (test_df, 'test')]:
        df, dset = data
        print(f'Predict {dset}')
        pred_ds = AudioDataset(df, mode='test', cfg=cfg)
        pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
        pred_df, pred = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, k=k_predictions)
        pred_df.to_csv(f'{exp_path}/{dset}_predictions.csv', index=False) 
        np.save(f'{exp_path}/{dset}_predictions.npy', pred) 
        if dset == 'test':
            pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
            pred_df, pred = inference_all(SimpleCNN(cfg), state_dict_path, test_df, cfg, f'{cfg.data_path}/test')
            pred_df.to_csv(f'{exp_path}/{dset}_predictions_all.csv', index=False) 
            np.save(f'{exp_path}/{dset}_predictions_all.npy', pred) 
            pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions_all.csv', index=False)
        elif dset == 'val':
            error_analysis(exp_path)

In [7]:
44100/2.6

In [None]:
fmax = [2,2.6]
for i in fmax:
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 4.5

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = 'vgg16'
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 30
    cfg.lr = 1e-4
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = cfg.sample_rate / i 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_vgg16_{str(cfg.wav_crop_len)}s_crop_f_max_{cfg.fmax}_new'
    
    # Callbacks
    tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
    early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

    # Loss Function and class weights
    class_weights = np.load(f'{full_path}/data/class_weights.npy')
    loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                                  label_smoothing=cfg.label_smoothing)

    # Data Logic, Loading, Augmentation
    dm = DataModule(cfg=cfg)

    # Network
    model = SimpleCNN(cfg)

    # Training Logic
    tmod = TrainModule(model, 
                       loss_fn=loss_fn, 
                       optimizer_name='Adam', 
                       optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                       cfg=cfg)
    
    trainer = L.Trainer(
        max_epochs=cfg.n_epochs,
        accelerator="auto",
        devices="auto",
        enable_checkpointing=True,
        reload_dataloaders_every_n_epochs=False,
        logger=tb_logger,
        callbacks=[lr_monitor, checkpoint_callback]
    )
    trainer.fit(tmod, dm)

In [13]:
fmax = [2,2.6]
for i in fmax:
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 4.5

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = 'vgg16'
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 30
    cfg.lr = 1e-4
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = cfg.sample_rate / i
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_vgg16_{str(cfg.wav_crop_len)}s_crop_f_max_{cfg.fmax}_new'

    # glob(f'{exp_path}/checkpoints/*')
    exp_path = f'effnet_baseline/{cfg.exp_name}'
    label_path = f'{full_path}/data/labels.json'
    state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
    #
    k_predictions = 2

    val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
    val_df = val_df[val_df['subset']=='validation']
    val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

    test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
    test_df['path'] = test_df['file_name'].apply(lambda x: f'../../data/test/{x}')

    for data in [(val_df, 'val'), (test_df, 'test')]:
        df, dset = data
        print(f'Predict {dset}')
        pred_ds = AudioDataset(df, mode='test', cfg=cfg)
        pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
        pred_df, pred = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, k=k_predictions)
        pred_df.to_csv(f'{exp_path}/{dset}_predictions_k-random.csv', index=False) 
        pred_df.to_csv(f'{exp_path}/{dset}_predictions.csv', index=False)
        np.save(f'{exp_path}/{dset}_predictions_k-random.npy', pred) 
        if dset == 'test':
            #pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
            #pred_df, pred = inference_all(SimpleCNN(cfg), state_dict_path, test_df, cfg, f'{cfg.data_path}/test')
            #pred_df.to_csv(f'{exp_path}/{dset}_predictions_all.csv', index=False) 
            #np.save(f'{exp_path}/{dset}_predictions_all.npy', pred) 
            #pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions_all.csv', index=False)
            continue
        elif dset == 'val':
            error_analysis(exp_path)

In [14]:
df = pd.read_csv(f'effnet_baseline/lukas_vgg16_4.5s_crop_f_max_22050.0_new/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

In [10]:
df = pd.read_csv(f'effnet_baseline/lukas_vgg16_4.5s_crop_f_max_16961.53846153846_new/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

# TEST FMAX

In [24]:
round(44100/3)

In [26]:
fmax = [2.25,2.5,2.75]
fmax = [2.75]
for i in fmax:
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 6

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = 'tf_efficientnetv2_s.in21k'
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 20
    cfg.lr = 0.000251
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = round(cfg.sample_rate / i) 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_tf_efficientnetv2_s.in21k_{str(cfg.wav_crop_len)}s_crop_f_max_{cfg.fmax}'
    
    # Callbacks
    tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
    early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

    # Loss Function and class weights
    class_weights = np.load(f'{full_path}/data/class_weights.npy')
    loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                                  label_smoothing=cfg.label_smoothing)

    # Data Logic, Loading, Augmentation
    dm = DataModule(cfg=cfg)

    # Network
    model = SimpleCNN(cfg)

    # Training Logic
    tmod = TrainModule(model, 
                       loss_fn=loss_fn, 
                       optimizer_name='Adam', 
                       optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                       cfg=cfg)
    
    trainer = L.Trainer(
        max_epochs=cfg.n_epochs,
        accelerator="auto",
        devices="auto",
        enable_checkpointing=True,
        reload_dataloaders_every_n_epochs=False,
        logger=tb_logger,
        callbacks=[lr_monitor, checkpoint_callback]
    )
    trainer.fit(tmod, dm)

In [29]:
fmax = [2.25,2.75]
for i in fmax:
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 6

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = 'tf_efficientnetv2_s.in21k'
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 20
    cfg.lr = 0.000251
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = round(cfg.sample_rate / i) 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_tf_efficientnetv2_s.in21k_{str(cfg.wav_crop_len)}s_crop_f_max_{cfg.fmax}'
    
     # glob(f'{exp_path}/checkpoints/*')
    exp_path = f'effnet_baseline/{cfg.exp_name}'
    label_path = f'{full_path}/data/labels.json'
    state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
    #
    k_predictions = 2

    val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
    val_df = val_df[val_df['subset']=='validation']
    val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

    test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
    test_df['path'] = test_df['file_name'].apply(lambda x: f'../../data/test/{x}')

    for data in [(val_df, 'val'), (test_df, 'test')]:
        df, dset = data
        print(f'Predict {dset}')
        pred_ds = AudioDataset(df, mode='test', cfg=cfg)
        pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
        pred_df, pred = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, k=k_predictions)
        pred_df.to_csv(f'{exp_path}/{dset}_predictions_k-random.csv', index=False) 
        pred_df.to_csv(f'{exp_path}/{dset}_predictions.csv', index=False)
        np.save(f'{exp_path}/{dset}_predictions_k-random.npy', pred) 
        if dset == 'test':
            pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
            pred_df, pred = inference_all(SimpleCNN(cfg), state_dict_path, test_df, cfg, f'{cfg.data_path}/test')
            pred_df.to_csv(f'{exp_path}/{dset}_predictions_all.csv', index=False) 
            np.save(f'{exp_path}/{dset}_predictions_all.npy', pred) 
            pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions_all.csv', index=False)
            continue
        elif dset == 'val':
            error_analysis(exp_path)

In [28]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_16036/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()

# More epochs

In [None]:
fmax = [2.25,2.75]
for i in fmax:
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 6

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = 'tf_efficientnetv2_s.in21k'
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 32
    cfg.lr = 0.000251
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = round(cfg.sample_rate / i) 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_tf_efficientnetv2_s.in21k_{str(cfg.wav_crop_len)}s_crop_f_max_{cfg.fmax}_more_epochs'
    
    # Callbacks
    tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
    early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

    # Loss Function and class weights
    class_weights = np.load(f'{full_path}/data/class_weights.npy')
    loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                                  label_smoothing=cfg.label_smoothing)

    # Data Logic, Loading, Augmentation
    dm = DataModule(cfg=cfg)

    # Network
    model = SimpleCNN(cfg)

    # Training Logic
    tmod = TrainModule(model, 
                       loss_fn=loss_fn, 
                       optimizer_name='Adam', 
                       optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                       cfg=cfg)
    
    trainer = L.Trainer(
        max_epochs=cfg.n_epochs,
        accelerator="auto",
        devices="auto",
        enable_checkpointing=True,
        reload_dataloaders_every_n_epochs=False,
        logger=tb_logger,
        callbacks=[lr_monitor, checkpoint_callback]
    )
    trainer.fit(tmod, dm)

# TEST NOISE

In [None]:
noise = [0.1,0.15,0.3,0.5]
for i in noise:
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 6

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = 'tf_efficientnetv2_s.in21k'
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 20
    cfg.lr = 0.000251
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = cfg.sample_rate / 2.25
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = i
    cfg.noise_prob = i

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_tf_efficientnetv2_s.in21k_{str(cfg.wav_crop_len)}s_crop_noise_{i}'
    
    # Callbacks
    tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
    early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

    # Loss Function and class weights
    class_weights = np.load(f'{full_path}/data/class_weights.npy')
    loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                                  label_smoothing=cfg.label_smoothing)

    # Data Logic, Loading, Augmentation
    dm = DataModule(cfg=cfg)

    # Network
    model = SimpleCNN(cfg)

    # Training Logic
    tmod = TrainModule(model, 
                       loss_fn=loss_fn, 
                       optimizer_name='Adam', 
                       optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                       cfg=cfg)
    
    trainer = L.Trainer(
        max_epochs=cfg.n_epochs,
        accelerator="auto",
        devices="auto",
        enable_checkpointing=True,
        reload_dataloaders_every_n_epochs=False,
        logger=tb_logger,
        callbacks=[lr_monitor, checkpoint_callback]
    )
    trainer.fit(tmod, dm)

In [4]:
fmax = [2]
for i in fmax:
    
    cfg = SimpleNamespace()

    cfg.wav_crop_len = 6

    cfg.data_path = f'{full_path}/data/production_data/{str(cfg.wav_crop_len).replace(".", "-")}s_crop/'
    cfg.n_classes = 66
    cfg.pretrained = True
    cfg.backbone = 'tf_efficientnetv2_s.in21k'
    cfg.in_chans = 1
    cfg.num_workers = 4
    cfg.include_val = False

    # Training Hyperparameters
    cfg.n_epochs = 20
    cfg.lr = 0.000251
    cfg.weight_decay = 1e-4
    cfg.label_smoothing = 0.1
    cfg.batch_size = 32
    cfg.sample_rate = 44100

    # Mel Spectogram Hyperparameters
    cfg.n_mels = 128
    cfg.n_fft = 2048
    cfg.fmin = 300
    cfg.fmax = round(cfg.sample_rate / i) 
    cfg.window_size = cfg.n_fft
    cfg.hop_length = int(cfg.n_fft / 2)
    cfg.power = 2
    cfg.top_db = 80.0

    # Augmentation Parameters
    cfg.impulse_prob = 0.2
    cfg.noise_prob = 0.2

    cfg.max_noise = 0.04
    cfg.min_snr = 5
    cfg.max_snr = 20

    cfg.min, cfg.max = get_min_max(cfg, DataModule, SimpleCNN)
    cfg.exp_name = f'lukas_tf_efficientnetv2_s.in21k_{str(cfg.wav_crop_len)}s_crop_f_max_{cfg.fmax}'
    
    # Callbacks
    tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
    early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

    # Loss Function and class weights
    class_weights = np.load(f'{full_path}/data/class_weights.npy')
    loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                                  label_smoothing=cfg.label_smoothing)

    # Data Logic, Loading, Augmentation
    dm = DataModule(cfg=cfg)

    # Network
    model = SimpleCNN(cfg)

    # Training Logic
    tmod = TrainModule(model, 
                       loss_fn=loss_fn, 
                       optimizer_name='Adam', 
                       optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                       cfg=cfg)
    
    trainer = L.Trainer(
        max_epochs=cfg.n_epochs,
        accelerator="auto",
        devices="auto",
        enable_checkpointing=True,
        reload_dataloaders_every_n_epochs=False,
        logger=tb_logger,
        callbacks=[lr_monitor, checkpoint_callback]
    )
    trainer.fit(tmod, dm)

In [5]:
exp_path = f'effnet_baseline/{cfg.exp_name}'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"
    #
k_predictions = 2

val_df = pd.read_csv(f"{full_path}/data/metadata.csv")
val_df = val_df[val_df['subset']=='validation']
val_df['path'] = val_df['path'].apply(lambda x: f'{full_path}/{x}')

test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_df['path'] = test_df['file_name'].apply(lambda x: f'../../data/test/{x}')

for data in [(val_df, 'val'), (test_df, 'test')]:
    df, dset = data
    print(f'Predict {dset}')
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
    pred_df, pred = inference_k_random(SimpleCNN(cfg), state_dict_path, pred_dl, df, k=k_predictions)
    pred_df.to_csv(f'{exp_path}/{dset}_predictions_k-random.csv', index=False) 
    pred_df.to_csv(f'{exp_path}/{dset}_predictions.csv', index=False)
    np.save(f'{exp_path}/{dset}_predictions_k-random.npy', pred) 
    if dset == 'test':
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
        pred_df, pred = inference_all(SimpleCNN(cfg), state_dict_path, test_df, cfg, f'{cfg.data_path}/test')
        pred_df.to_csv(f'{exp_path}/{dset}_predictions_all.csv', index=False) 
        np.save(f'{exp_path}/{dset}_predictions_all.npy', pred) 
        pred_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions_all.csv', index=False)
        continue
    elif dset == 'val':
        error_analysis(exp_path)

In [6]:
df = pd.read_csv(f'effnet_baseline/lukas_tf_efficientnetv2_s.in21k_6s_crop_f_max_22050/val_evaluation.csv')
df.drop(df.tail(3).index, inplace=True)
df[['f1-score']].mean()