**About** : This notebook is used to infer models.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src/

/home/tviel/work/kaggle_birdclef_2024/src


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


### Imports

In [3]:
import os
import gc
import sys
import cv2
import glob
import json
import torch
import shutil
import librosa
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn.functional as F
import matplotlib.pyplot as plt

from tqdm import tqdm
from joblib import Parallel, delayed

In [98]:
from util.logger import Config
from util.metrics import macro_auc
from util.torch import load_model_weights

from data.dataset import WaveInfDataset
from data.preparation import prepare_data, prepare_folds
from data.processing import create_target_path, ProgressParallel, get_load_librosa_save_h5py

from model_zoo.models import define_model
from inference.predict import predict

from params import CLASSES

### Params

In [100]:
EVAL = False

In [101]:
if EVAL:
    DATA_PATH = "../input/train_audio/"
else:
    DATA_PATH = "../input/test_soundscapes/"

SAVE_FOLDER = "../output/tmp/"

if os.path.exists(SAVE_FOLDER):
    shutil.rmtree(SAVE_FOLDER)
os.makedirs(SAVE_FOLDER)

In [102]:
BATCH_SIZE = 32
USE_FP16 = True
NUM_WORKERS = 2

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [103]:
FOLD = 0 if EVAL else "fullfit_0"

EXP_FOLDERS = [
    ("../logs/2024-04-12/8/", [FOLD]),
]

In [104]:
# from util.logger import upload_to_kaggle

# upload_to_kaggle(
#     [f for f, _ in EXP_FOLDERS],
#     directory="../output/dataset_1/",
#     dataset_name="BirdCLEF 2024 Weights 1",
#     update_folders=True
# )

### Preprocessing

In [105]:
DURATION, SR = 5, 32000

In [87]:
df = pd.DataFrame({"path": glob.glob(DATA_PATH + "*/*"),})
df["id"] = df["path"].apply(lambda x: x.split("/")[-1][:-4])

if EVAL:
    folds = pd.read_csv('../input/folds_4.csv')
    folds['id'] = folds['filename'].apply(lambda x: x.split('/')[-1][:-4])
    df = df.merge(folds)
    df = df[df['fold'] == 0].reset_index(drop=True)

    df["primary_label"] = df["path"].apply(lambda x:  x.split('/')[-2])

df["DURATION"] = df["path"].apply(lambda x: librosa.get_DURATION(path=x))

df['slice'] = df['DURATION'].apply(lambda x: [(DURATION * i * SR, DURATION * (i + 1) * SR) for i in range(int(np.ceil(x / segment_len)))])
df = df.explode("slice")

df["t_end"] = DURATION
df['t_end'] = df[["id", "t_end"]].groupby("id").cumsum()

if EVAL:
    df = df[df['t_end'] == 5]

In [88]:
df.head()

Unnamed: 0,path,id,filename,fold,primary_label,duration,slice,t_end
0,../input/train_audio/rorpar/XC199339.ogg,XC199339,rorpar/XC199339.ogg,0,rorpar,13.165719,"(0, 160000)",5
1,../input/train_audio/rorpar/XC757943.ogg,XC757943,rorpar/XC757943.ogg,0,rorpar,1.752,"(0, 160000)",5
2,../input/train_audio/rorpar/XC246306.ogg,XC246306,rorpar/XC246306.ogg,0,rorpar,202.448969,"(0, 160000)",5
3,../input/train_audio/rorpar/XC446300.ogg,XC446300,rorpar/XC446300.ogg,0,rorpar,18.96,"(0, 160000)",5
4,../input/train_audio/rorpar/XC606439.ogg,XC606439,rorpar/XC606439.ogg,0,rorpar,8.688,"(0, 160000)",5


### Models

In [89]:
models = []

for exp_folder, folds in EXP_FOLDERS:
    models_ = []
    config = Config(json.load(open(exp_folder + "config.json", "r")))

    model = define_model(
        config.name,
        config.melspec_config,
        head=config.head,
        aug_config=config.aug_config,
        num_classes=config.num_classes,
        n_channels=config.n_channels,
        drop_rate=config.drop_rate,
        drop_path_rate=config.drop_path_rate,
        increase_stride=config.increase_stride,
        verbose=True,
        pretrained=False
    )
    model = model.to(DEVICE).eval()
    
    for fold in folds:
        weights = exp_folder + f"{config.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
        models_.append(model)
        
    models.append(models_)


 -> Loading encoder weights from ../logs/2024-04-12/8/tf_efficientnetv2_s_0.pt



In [90]:
dataset = WaveInfDataset(
    df,
    normalize=config.normalize,
    max_len=config.melspec_config["sample_rate"] * config.duration,
)

In [91]:
preds = predict(
    model,
    dataset,
    config.loss_config,
    batch_size=BATCH_SIZE,
    device=DEVICE,
    use_fp16=USE_FP16,
    num_workers=NUM_WORKERS,
)

  0%|          | 0/191 [00:00<?, ?it/s]

In [92]:
if EVAL:
    # dataset.use_secondary_labels = True
    # y = dataset.get_targets()
    # auc = macro_auc(y, preds)

    auc = macro_auc(df["primary_label"].values.tolist(), preds)
    print(f'Fold 0 AUC: {auc:.3f}')

Fold 0 AUC: 0.978


In [99]:
sub = pd.DataFrame((df["id"] + "_" + df["t_end"].astype(str)).values, columns=['row_id'])
sub.loc[:, CLASSES] = preds
# sub.to_csv('submission.csv', index=False)
sub.head()

Unnamed: 0,row_id,asbfly,ashdro1,ashpri1,ashwoo2,asikoe2,asiope1,aspfly1,aspswi1,barfly1,...,whbwoo2,whcbar1,whiter2,whrmun,whtkin2,woosan,wynlau1,yebbab1,yebbul3,zitcis1
0,XC199339_5,0.047516,0.00104,0.004467,0.000707,0.003136,0.000178,0.001467,7e-05,0.001095,...,0.000631,0.000911,0.005775,0.00036,0.004055,0.007011,0.000196,0.000174,0.000136,0.1604
1,XC757943_5,0.000255,0.000368,0.000657,8.5e-05,0.001211,0.000217,0.000439,5.1e-05,0.000371,...,0.000296,0.000646,0.001748,0.000102,0.000958,0.021454,6.1e-05,0.000364,5.9e-05,0.008781
2,XC246306_5,0.011597,0.015305,0.002035,0.000389,0.02092,0.000641,0.010948,0.001782,0.003607,...,0.012192,0.0093,0.01049,0.002125,0.010368,0.00927,0.004398,0.00518,0.00075,0.008987
3,XC446300_5,0.000752,0.006851,0.001525,0.000157,0.005344,0.000251,0.000448,0.001086,0.000181,...,0.000937,0.002165,0.004887,0.000357,0.147583,0.015602,0.000886,0.018646,4e-05,0.002125
4,XC606439_5,0.000873,0.008415,0.004314,0.000527,0.004055,0.000584,0.001427,0.000974,0.000211,...,0.000758,0.000573,0.00528,0.000261,0.005001,0.00573,0.000849,0.001957,9.2e-05,0.033844


Done ! 