**About** : This notebook is used to prepare the data.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

### Imports

In [None]:
import os
import sys
import glob
import json
import torch
import librosa
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from copy import deepcopy
from joblib import delayed
from sklearn.metrics import *

warnings.simplefilter(action="ignore", category=UserWarning)

In [None]:
from params import *
from util.plots import *

from data.preparation import *
from data.dataset import WaveDataset
from data.transforms import get_transfos

from util.logger import create_logger, save_config, prepare_log_folder
from util.torch import init_distributed

from model_zoo.models import define_model
from model_zoo.melspec import FeatureExtractor, CustomTimeMasking, CustomFreqMasking

from training.losses import BirdLoss
from training.main import k_fold

### Data

In [None]:
df = prepare_data()
df_xc = prepare_xenocanto_data()
df = prepare_nocall_data()

df.sample(5)

In [None]:
transforms = get_transfos(augment=True, strength=1)

In [None]:
dataset = WaveDataset(
    df,
    transforms,
    max_len=32000 * 10,
    train=True,
    self_mixup=True,
)

In [None]:
for i in tqdm(range(len(dataset))):
    # try:
    x = dataset[i]
    # except:
    #     print(i)
    #     break

In [None]:
wave, y, w = dataset[np.random.choice(len(dataset))]
# w = wave

In [None]:
display_audio(wave.numpy(), title=", ".join(([CLASSES[int(c)] for c in np.argwhere(y.numpy())[0]])))

In [None]:
# display_audio(w.numpy(), title=", ".join(([CLASSES[int(c)] for c in np.argwhere(y.numpy())[0]])))

### Spectrograms

In [None]:
params = {
    "sample_rate": 32000,
    "n_mels": 128,
    "f_min": 50,
    "f_max": 15000,
    "n_fft": 2048,
    "hop_length": 512,
    "normalized": False,
}

aug_config = {
    "specaug_freq": {
        "mask_max_length": 10,
        "mask_max_masks": 3,
        "p": 0.,
    },
    "specaug_time": {
        "mask_max_length": 20,
        "mask_max_masks": 3,
        "p": 0.,
    },
    "mixup":
    {
        "p_audio": 0,
        "p_spec": 0,
        "additive": True,
        "alpha": 4,
        "num_classes": 182,
    }
}

In [None]:
ft_extractor = FeatureExtractor(params, aug_config=aug_config)
ft_extractor = ft_extractor.train()

In [None]:
x = torch.stack([wave, wave])
if len(y.size()) < 2:
    y = torch.stack([y, y])

In [None]:
melspec, _, _ = ft_extractor(x)

In [None]:
plot_spectrogram(melspec[0].numpy(), params)

In [None]:
# plot_spectrogram(melspec[1].numpy(), params)

### Model

In [None]:
model = define_model(
    "eca_nfnet_l0", 
    params,
    head="gem",
    n_channels=1,
    num_classes=NUM_CLASSES,
)

In [None]:
pred, _, _ = model(x, y)

In [None]:
y.size(), pred.size()

In [None]:
loss = BirdLoss({"name": "bce", "top_k": 0})
loss(pred, y)

### Train

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    train_duration = 5
    duration = 5
    aug_strength = 0
    self_mixup = True
    normalize = True

    melspec_config = {
        "sample_rate": 32000,
        "n_mels": 128,  # 128, 224
        "f_min": 50,
        "f_max": 15000,
        "n_fft": 2048,
        "hop_length": 512,  # 716, 512, 417
        "normalized": False,
    }

    aug_config = {
        "specaug_freq": {
            "mask_max_length": 10,
            "mask_max_masks": 3,
            "p": 0.5,
        },
        "specaug_time": {
            "mask_max_length": 20,
            "mask_max_masks": 3,
            "p": 0.5,
        },
        "mixup":
        {
            "p_audio": 0.75,
            "p_spec": 0.25,
            "additive": True,
            "alpha": 4,
            "num_classes": 182,
        }
    }

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
    name = "tf_efficientnetv2_s"  # tf_efficientnetv2_s maxvit_tiny_tf_384 eca_nfnet_l0
    pretrained_weights = None

    num_classes = 182
    drop_rate = 0.2
    drop_path_rate = 0.2
    n_channels = 1
    head = "gem"
    reduce_stride = "256" in name

    # Training
    loss_config = {
        "name": "bce",
        "weighted": True,
        "smoothing": 0.,
        "top_k": 0,
        "activation": "sigmoid",  # "softmax"
    }
    secondary_labels_weight = 1. if loss_config["name"] == "bce" else 0.5  # 0.5 for ce / 1. for bce

    data_config = {
        "batch_size": 64,
        "val_bs": 256,
        "num_classes": num_classes,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "Ranger",
        "lr": 5e-3,
        "warmup_prop": 0.0,
        "betas": (0.9, 0.999),
        "max_grad_norm": 0.1,
        "weight_decay": 0.,
    }

    epochs = 20

    use_fp16 = True
    verbose = 1
    verbose_eval = 20

    fullfit = True
    n_fullfit = 1

In [None]:
DEBUG = True

In [None]:
log_folder = None
config = Config

if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f'\n -> Logging results to {log_folder}\n')

    create_logger(directory=log_folder, name="logs.txt")
    save_config(config, log_folder + "config.json")

init_distributed(config)

In [None]:
df = prepare_data(DATA_PATH)
# df_xc = prepare_xenocanto_data(DATA_PATH)
# df = pd.concat([df, df_xc], ignore_index=True)

# if DEBUG:
#     df = df.sample(100).reset_index(drop=True)

In [None]:
k_fold(config, df, log_folder=log_folder)

Done !