**About** : This notebook is used to prepare the data.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src/

/home/tviel/work/kaggle_birdclef_2024/src


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


### Imports

In [3]:
import os
import sys
import glob
import json
import torch
import librosa
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from copy import deepcopy
from joblib import delayed
from sklearn.metrics import *

warnings.simplefilter(action="ignore", category=UserWarning)
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 200)


In [4]:
from params import *
from util.plots import *

from data.preparation import *
from data.dataset import WaveDataset, PLDataset
from data.transforms import get_transfos

from util.logger import create_logger, save_config, prepare_log_folder
from util.torch import init_distributed

from model_zoo.models import define_model
from model_zoo.melspec import FeatureExtractor, CustomTimeMasking, CustomFreqMasking

from training.losses import BirdLoss
from training.main import k_fold

### Data

In [5]:
df = prepare_data_2()
df_xc = prepare_xenocanto_data()
# df = prepare_nocall_data()

df.sample(5)

Unnamed: 0,id,filename,primary_label,secondary_labels,rating,path,path_ft,fold
10815,XC786450,graher1/XC786450.ogg,graher1,[],5.0,../input/train_audio/graher1/XC786450.ogg,../input/train_features/graher1/XC786450.hdf5,2
16304,XC209621,lesyel1/XC209621.ogg,lesyel1,[],4.0,../input/train_audio/lesyel1/XC209621.ogg,../input/train_features/lesyel1/XC209621.hdf5,2
23239,XC770531,whtkin2/XC770531.ogg,whtkin2,[],5.0,../input/train_audio/whtkin2/XC770531.ogg,../input/train_features/whtkin2/XC770531.hdf5,1
7731,XC814947,comsan/XC814947.ogg,comsan,[],5.0,../input/train_audio/comsan/XC814947.ogg,../input/train_features/comsan/XC814947.hdf5,2
188,XC686159,ashdro1/XC686159.ogg,ashdro1,[],4.0,../input/train_audio/ashdro1/XC686159.ogg,../input/train_features/ashdro1/XC686159.hdf5,3


In [6]:
# df_ = pd.concat([
#     df[~df["primary_label"].apply(lambda x: isinstance(x, list))],
#     df[df["primary_label"].apply(lambda x: isinstance(x, list))].explode('primary_label')
# ], ignore_index=True)

# cts = df_["primary_label"].value_counts()
# cts = np.array([cts.to_dict()[c] for c in CLASSES])

# ws = (1 / (cts / 500)) ** 0.4

In [10]:
folders = [
    "../logs/2024-05-14/17/",
    "../logs/2024-05-14/16/",
    "../logs/2024-05-14/15/",
    "../logs/2024-05-14/14/",
    "../logs/2024-05-14/12/",
    "../logs/2024-05-14/8/",
]

pls = [pd.read_csv("../output/cpmp_preds_72/pl_sub.csv")]
for f in tqdm(folders):
    for fold in [0]: # , 1, 2, 3]:
        pls.append(pd.read_csv(f + f"pl_sub_{fold}.csv"))

FileNotFoundError: [Errno 2] No such file or directory: '../logs/2024-05-14/12/pl_sub_0.csv'

In [None]:
dataset = PLDataset(pls, normalize="std")

In [8]:


dataset = PLDataset(pls, normalize="std")
x, y, y_aux, w = dataset[0]
pl_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=256,
    shuffle=True,
    drop_last=True,
    num_workers=8,
    pin_memory=False,
)
pl_iterator = iter(pl_loader)

NameError: name 'folders' is not defined

In [None]:
for i in tqdm(range(len(pl_loader))):
    x, y, y_aux, w = next(pl_iterator)

In [None]:
transforms = get_transfos(augment=True, strength=1)

In [None]:
dataset = WaveDataset(
    df,
    transforms,
    max_len=32000 * 5,
    train=True,
    self_mixup=True,
    secondary_labels_weight=1,
)

In [None]:
# for i in tqdm(range(len(dataset))):
#     # try:
#     x = dataset[i]
#     # except:
#     #     print(i)
#     #     break

In [None]:
wave, y, y_aux, w = dataset[np.random.choice(len(dataset))]
# w = wave
y.sum(), y_aux.sum()

In [None]:
# display_audio(wave.numpy(), title=", ".join(([CLASSES[int(c)] for c in np.argwhere(y.numpy())[0]])))

In [None]:
# display_audio(w.numpy(), title=", ".join(([CLASSES[int(c)] for c in np.argwhere(y.numpy())[0]])))

### Spectrograms

In [None]:
# params = {
#     "sample_rate": 32000,
#     "n_mels": 128,
#     "f_min": 50,
#     "f_max": 15000,
#     "n_fft": 2048,
#     "hop_length": 512,
#     "normalized": False,
# }

params = {
    "sample_rate": 32000,
    "n_mels": 224,  # 128, 224
    "f_min": 90,  # 50
    "f_max": 14000,  # 15000
    "n_fft": 1536,  # 1536
    "hop_length": 717,  # 717
    "win_length": 1024,
    "mel_scale": "htk",
    "power": 2.0,
}

aug_config = {
    "specaug_freq": {
        "mask_max_length": 10,
        "mask_max_masks": 3,
        "p": 0.,
    },
    "specaug_time": {
        "mask_max_length": 20,
        "mask_max_masks": 3,
        "p": 0.,
    },
    "mixup":
    {
        "p_audio": 0.5,
        "p_spec": 0,
        "additive": True,
        "alpha": 4,
        "num_classes": 182,
    }
}

In [None]:
ft_extractor = FeatureExtractor(params, aug_config=aug_config, norm="simple")
ft_extractor = ft_extractor.train()

In [None]:
x = torch.stack([wave, wave])
if len(y.size()) < 2:
    y = torch.stack([y, y])

In [None]:
with torch.cuda.amp.autocast(enabled=True):
    melspec, _, _, _ = ft_extractor(x)

In [None]:
melspec.size()

In [None]:
plot_spectrogram(melspec[0].numpy(), params)

In [None]:
# plot_spectrogram(melspec[1].numpy(), params)

### Model

In [None]:
# "mixnet_s" "mobilenetv2_100" "mnasnet_100" "tf_efficientnet_b0" "tinynet_b"

In [None]:
model = define_model(
    "efficientvit_b0", 
    params,
    head="gem",
    n_channels=3,
    num_classes=NUM_CLASSES,
)

In [None]:
from util.torch import count_parameters
count_parameters(model)

In [None]:
pred, _, _, w = model(x, y, y_aux, w)

In [None]:
y.size(), pred.size()

In [None]:
loss = BirdLoss({"name": "bce", "top_k": 0, "ousm_k": 0})
loss(pred, y)

### Train

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    use_nocall = False
    upsample_low_freq = False

    train_duration = 5  # 15, 5
    duration = 5
    random_crop = False

    aug_strength = 1
    self_mixup = True
    normalize = True  # False ??
    exportable = False

    melspec_config = {
        "sample_rate": 32000,
        "n_mels": 128,  # 128, 224
        "f_min": 50,
        "f_max": 15000,
        "n_fft": 2048,
        "hop_length": 512,  # 716, 512, 417
        "normalized": True,
    }

    aug_config = {
        "specaug_freq": {
            "mask_max_length": 10,
            "mask_max_masks": 3,
            "p": 0.5,
        },
        "specaug_time": {
            "mask_max_length": 20,
            "mask_max_masks": 3,
            "p": 0.5,
        },
        "mixup":
        {
            "p_audio": 0.5,
            "p_spec": 0.2,
            "additive": True,
            "alpha": 4,
            "num_classes": 182,
        }
    }

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
    name = "tf_efficientnetv2_s"  # tf_efficientnetv2_s maxvit_tiny_tf_384 eca_nfnet_l0
    pretrained_weights = None

    num_classes = 182
    drop_rate = 0.2
    drop_path_rate = 0.2
    n_channels = 1
    head = "gem"
    reduce_stride = "256" in name

    # Training
    loss_config = {
        "name": "focal_bce",
        "weighted": False,
        "smoothing": 0.,
        "top_k": 0,
        "ousm_k": 0,
        "activation": "sigmoid",  # "softmax"
    }
    secondary_labels_weight = 0.5 if loss_config["name"] == "ce" else 1.

    data_config = {
        "batch_size": 8,
        "val_bs": 256,
        "num_classes": num_classes,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "Ranger",
        "lr": 5e-3,
        "warmup_prop": 0.0,
        "betas": (0.9, 0.999),
        "max_grad_norm": 0.1,
        "weight_decay": 0.,
    }

    epochs = 1

    use_fp16 = True
    verbose = 1
    verbose_eval = 20

    fullfit = True
    n_fullfit = 1

In [None]:
DEBUG = True

In [None]:
log_folder = None
config = Config

if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f'\n -> Logging results to {log_folder}\n')

    create_logger(directory=log_folder, name="logs.txt")
    save_config(config, log_folder + "config.json")

init_distributed(config)

In [None]:
df = prepare_data(DATA_PATH)
# df_xc = prepare_xenocanto_data(DATA_PATH)
# df = pd.concat([df, df_xc], ignore_index=True)

# if DEBUG:
#     df = df.sample(100).reset_index(drop=True)

In [None]:
k_fold(config, df, log_folder=log_folder)

Done !