In [2]:
%pip install -r ../../requirements.txt

In [6]:
import os 
import sys
import json
from types import SimpleNamespace
import random
from glob import glob

import torch
import torchaudio
import torch.nn as nn
import numpy as np
import pandas as pd
import lightning as L
from tqdm import tqdm
from timm import create_model, list_models
from torch.utils.data import DataLoader
from lightning.pytorch.loggers import CSVLogger, TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor

sys.path.append('../../src')

from custom.data import AudioDataset, DataModule
from custom.trainer import TrainModule
from custom.net import SimpleCNN
from custom.utils import batch_to_device, get_state_dict, batch_to_device
from custom.eval import inference_k_random, error_analysis


# determinism
# seed = 1337
# torch.manual_seed(seed)
# np.random.seed(seed)
# random.seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)
%load_ext autoreload
%autoreload 2

full_path = '../../'

In [4]:
cfg = SimpleNamespace()

cfg.data_path = f'{full_path}/data/production_data/5s_crop/'
cfg.n_classes = 66
cfg.pretrained = True
cfg.backbone = 'tf_efficientnetv2_s.in21k'
cfg.in_chans = 1
cfg.num_workers = 4
cfg.include_val = True

# Training Hyperparameters
cfg.n_epochs = 20
cfg.lr = 0.000251
cfg.weight_decay = 1e-4
cfg.label_smoothing = 0.1
cfg.batch_size = 32

# Mel Spectogram Hyperparameters
cfg.wav_crop_len = 5
cfg.sample_rate = 44100
cfg.n_mels = 128
cfg.n_fft = 2048
cfg.fmin = 300
cfg.fmax = cfg.sample_rate / 2 
cfg.window_size = cfg.n_fft
cfg.hop_length = int(cfg.n_fft / 2)
cfg.power = 2
cfg.top_db = 80.0

# Augmentation Parameters
cfg.impulse_prob = 0.2
cfg.noise_prob = 0.2
cfg.max_noise = 0.04
cfg.min_snr = 5
cfg.max_snr = 20

cfg.exp_name = f'{cfg.wav_crop_len}_crop-{cfg.n_epochs}e_noiseAug_CE_weighted_LS_pp_impulse_ALLDATA'

# Callbacks
tb_logger = TensorBoardLogger(save_dir="./", version=cfg.exp_name, name="effnet_baseline")
lr_monitor = LearningRateMonitor(logging_interval='epoch')
checkpoint_callback = ModelCheckpoint(monitor="val_f1", mode="max", save_last=True)
early_stop_callback = EarlyStopping(monitor="val_f1", patience=5, verbose=False, mode="max")

# Loss Function and class weights
class_weights = np.load(f'{full_path}/data/class_weights.npy')
loss_fn = nn.CrossEntropyLoss(weight=torch.from_numpy(class_weights).to('cuda'), 
                              label_smoothing=cfg.label_smoothing)

# Data Logic, Loading, Augmentation
dm = DataModule(cfg=cfg)

# Network
model = SimpleCNN(cfg)

# Training Logic
tmod = TrainModule(model, 
                   loss_fn=loss_fn, 
                   optimizer_name='Adam', 
                   optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.weight_decay},
                   cfg=cfg)

In [9]:
test_df = pd.read_csv(f"{full_path}/data/test/metadata.csv")
test_data = f'{cfg.data_path}/test'

In [20]:
sizes = []
for i in range(556):

    data = glob(f'{test_data}/{i}_*.wav')
    bs = len(data)

    df = pd.DataFrame(data, columns=['path'])
    sizes.append(bs)

In [11]:
pred_ds = AudioDataset(df, mode='test', cfg=cfg)
pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)
   

In [23]:
exp_path = f'effnet_baseline/{cfg.exp_name}/'
label_path = f'{full_path}/data/labels.json'
state_dict_path = f"{exp_path}/checkpoints/last.ckpt"

with open(label_path, 'r') as infile:
    data = json.load(infile)

torch.cuda.empty_cache()
device = "cuda" if torch.cuda.is_available() else 'cpu'
sd = get_state_dict(state_dict_path)
model = model.eval().to(device)
model.load_state_dict(sd)

preds = []
for i in tqdm(range(556)):
    data = glob(f'{test_data}/{i}_*.wav')
    df = pd.DataFrame(data, columns=['path'])
    pred_ds = AudioDataset(df, mode='test', cfg=cfg)
    pred_dl = DataLoader(pred_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=cfg.num_workers)

    df = pd.DataFrame(data, columns=['path'])
    with torch.no_grad():
        _preds = []
        for batch in tqdm(pred_dl):
            batch = batch_to_device(batch, device)
            with torch.cuda.amp.autocast():
                out = model(batch['wave'])
                _preds += [out.cpu().numpy()]
        _preds = np.vstack(_preds).mean(axis=0)
        
        preds.append(_preds)
test_df['predicted_class_id']  = np.array(preds).argmax(axis=-1)
torch.cuda.empty_cache()

In [24]:
test_df[['file_name', 'predicted_class_id']].to_csv(f'{exp_path}/submission_predictions_full.csv', index=False)

In [25]:
test_df

In [9]:
df_metadata = df_metadata[df_metadata['subset']=='train']

In [11]:
ad = AudioDataset(df_metadata)
ad.setup()

In [14]:
weights

In [13]:
weights = ad.labels.sum()/ad.labels.sum(axis=0)

In [16]:
loss_fn = nn.BCEWithLogitsLoss(reduction="none")

In [15]:
np.save('class_weights.npy', weights)

In [22]:
torch.sum(ad.labels - test)

In [None]:
# wave, sample_rate = torchaudio.load(f'../../data/train/Roeselianaroeselii_XC751814-dat028-019_edit1.wav')
# wave = wave[0] # only one channel
# wave = wave[0:cfg.wav_crop_len*sample_rate]
# from custom.utils import Compose, OneOf, NoiseInjection, GaussianNoise, PinkNoise
# wave_transforms = Compose(
#                 [OneOf(
#                     [NoiseInjection(p=1, max_noise_level=0.04),
#                      GaussianNoise(p=1, min_snr=5, max_snr=20),
#                      PinkNoise(p=1, min_snr=5, max_snr=20)],
#                     p=1),
#                 ]
#             )
# test = [wave_transforms(wave, sample_rate) for i in range(10)]

In [8]:
wave, sample_rate = torchaudio.load(f'../../data/preprocess/train/Tettigoniaviridissima_XC752199-dat117-023_chunk20.wav')
wave = wave[0] # only one channel

In [9]:
wave.shape[0]/sample_rate

In [8]:
from IPython.display import Audio # for listening to our insects

In [9]:
Audio(f'../../data/preprocess/train/Roeselianaroeselii_XC751814-dat028-019_edit1_chunk1.wav')

In [14]:
from glob import glob

irs = ")

In [15]:
irs

In [16]:
impulse = ApplyImpulseResponse(p=0.7, p_mode="per_example", sample_rate=sample_rate, mode="per_example",
                                            compensate_for_propagation_delay=True, ir_paths=irs)

In [44]:
aug.view(0, -1).shape

In [37]:
aug = impulse(wave.view(1, 1, -1))
import soundfile as sf
sf.write('test.wav', aug[0][0].numpy(), sample_rate) 
Audio(f'test.wav')

In [26]:
from eda_utils import plot_waveform

In [34]:
plot_waveform('test.wav')

In [35]:
plot_waveform(f'../../data/preprocess/train/Roeselianaroeselii_XC751814-dat028-019_edit1_chunk1.wav', time=15)