In [2]:
%pip install -r ../../requirements.txt

In [4]:
import os 
import sys
import json
from types import SimpleNamespace
import random
from glob import glob

import torch
import torchaudio
import torch.nn as nn
import numpy as np
import pandas as pd
import lightning as L
from tqdm import tqdm
from timm import create_model, list_models
from torch.utils.data import DataLoader
from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor

sys.path.append('../../src')

from custom.data import AudioDataset, DataModule
from custom.trainer import TrainModule
from custom.net import SimpleCNN
from custom.utils import batch_to_device, inference


# determinism
# seed = 1337
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)
%load_ext autoreload
%autoreload 2

In [5]:
cfg = SimpleNamespace()
cfg.n_classes = 66
cfg.pretrained = True
cfg.n_epochs = 20
cfg.backbone = 'tf_efficientnetv2_s.in21k'
cfg.in_chans = 1
cfg.lr = 0.000251 #0.000354
cfg.weight_decay = 1e-4
cfg.batch_size = 32

cfg.wav_crop_len = 7.5
cfg.sample_rate = 44100
cfg.n_mels = 128
cfg.n_fft = 2048
cfg.fmin = 300
cfg.fmax = cfg.sample_rate / 2 
cfg.window_size = cfg.n_fft
cfg.hop_length = int(cfg.n_fft / 2)
cfg.power = 2
cfg.top_db = 80.0
cfg.impulse_prob = 0.2

cfg.exp_name = f'{cfg.wav_crop_len}_crop-{cfg.n_epochs}e_noiseAug_CE_weighted_LS_pp_impulse_ALLDATA'
class_weights = np.load('class_weights.npy')

In [6]:
tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
lr_monitor = LearningRateMonitor(logging_interval='epoch')
checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), label_smoothing=0.1)#nn.BCEWithLogitsLoss(reduction="none", )

In [7]:
dm = DataModule(data_dir='../../data/production_data/7-5s_crop/', batch_size=cfg.batch_size, max_time=cfg.wav_crop_len)
model = SimpleCNN(cfg)
tmod = TrainModule(model, 
                   loss_fn=loss_fn, 
                   optimizer_name='Adam', 
                   optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                   cfg=cfg)
early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

In [None]:
trainer = L.Trainer(
    max_epochs=cfg.n_epochs,
    accelerator="auto",
    devices="auto",
    enable_checkpointing=True,
    reload_dataloaders_every_n_epochs=False,
    logger=tb_logger,
    callbacks=[lr_monitor, checkpoint_callback]
)
trainer.fit(tmod, dm)

In [8]:
# glob(f'{exp_path}/checkpoints/*')
exp_path = f'effnet_baseline/{cfg.exp_name}/'
label_path = '../../data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"

In [35]:
test_metadata_df = pd.read_csv("../../data/metadata.csv")
test_metadata_df = test_metadata_df[test_metadata_df['subset']=='validation']
test_ds = AudioDataset(test_metadata_df, mode='val', max_time=cfg.wav_crop_len)
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = 4)

test_metadata_df = inference(SimpleCNN(cfg), state_dict_path, test_dl, test_metadata_df, loss_fn, label_path, k=10)
test_metadata_df.to_csv(f'{exp_path}/val_predictions.csv', index=False) 

In [12]:
test_metadata_df = pd.read_csv("../../data/test/metadata.csv")
test_metadata_df['path'] = test_metadata_df['file_name'].apply(lambda x: f'../../data/test/{x}')

test_ds = AudioDataset(test_metadata_df, mode='test', max_time=cfg.wav_crop_len)
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = 4)

test_metadata_df = inference(SimpleCNN(cfg), state_dict_path, test_dl, test_metadata_df, loss_fn, label_path, k=10)
test_metadata_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
test_metadata_df.to_csv(f'{exp_path}/test_predictions.csv', index=False) 

In [11]:
test_metadata_df = pd.read_csv("../../data/test/metadata.csv")
test_metadata_df['path'] = test_metadata_df['file_name'].apply(lambda x: f'data/test/{x}')
test_ds = AudioDataset(test_metadata_df, mode='test')
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = 4)

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['state_dict']
    sd = {k.replace("model.", ""):v for k,v in sd.items()}
    return sd

# Opening JSON file
with open(f'../../data/labels.json', 'r') as infile:
    data = json.load(infile)

DEVICE = "cuda" if torch.cuda.is_available() else 'cpu'
state_dict =state_dict_path
sd = get_state_dict(state_dict)
model = SimpleCNN(cfg).eval().to(DEVICE)
model.load_state_dict(sd)

from tqdm import tqdm

with torch.no_grad():
    preds = []
    loss = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            out = model(batch['wave'])
            loss += [loss_fn(out, batch['labels']).cpu().numpy()]
            preds += [out.cpu().numpy()]
preds = np.vstack(preds)
loss = np.vstack(loss)
test_metadata_df['predicted_class_id']  = preds.argmax(axis=-1)

preds = np.vstack(preds)
loss = np.vstack(loss)

test_metadata_df['loss'] = loss.mean(axis=1)
test_metadata_df[list(data.keys())] = loss
test_metadata_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions.csv', index=False)
test_metadata_df.to_csv(f'{exp_path}/test_predictions.csv', index=False) 