**About** : This notebook is used to infer models.

In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
cd ../src/

/home/tviel/work/kaggle_birdclef_2024/src


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


### Imports

In [22]:
import os
import gc
import sys
import cv2
import glob
import json
import torch
import shutil
import joblib
import librosa
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn.functional as F
import matplotlib.pyplot as plt

from tqdm import tqdm
from scipy.special import expit
from joblib import Parallel, delayed

warnings.simplefilter(action="ignore", category=UserWarning)

In [5]:
from util.logger import Config
from util.metrics import macro_auc
from util.torch import load_model_weights

from data.dataset import WaveInfDataset
from data.preparation import prepare_data, prepare_folds
from data.processing import create_target_path, ProgressParallel, get_load_librosa_save_h5py

from model_zoo.models import define_model
from inference.predict import predict

from params import CLASSES

### Params

In [6]:
EVAL = False

In [7]:
if EVAL:
    DATA_PATH = "../input/train_audio/"
else:
    DATA_PATH = "../input/unlabeled_soundscapes/"

In [8]:
BATCH_SIZE = 48
USE_FP16 = True
NUM_WORKERS = 4

DEVICE = "cpu" 
RUNTIME = "openvino"

DURATION = 5
SR = 32000

In [9]:
FOLD = 0 if EVAL else "fullfit_0"

EXP_FOLDERS = [
    # ("../logs/2024-04-12/8/", [FOLD]),   # LB 0.64 baseline
    # ("../logs/2024-04-18/12/", [FOLD]),  #
    # ("../logs/2024-04-18/15/", [FOLD]),  #
    # ("../logs/2024-04-19/4/", [FOLD]),  # Change norm, sampling
    # ("../logs/2024-04-19/5/", [FOLD]),  # d=15s
    # ("../logs/2024-04-19/7/", [FOLD]),  # minmaxnorm, sampling, nocall, less mix
    # ("../logs/2024-04-19/8/", [FOLD]),  # minmaxnorm, no sampling, nocall, less mix no add
    # ("../logs/2024-04-19/10/", [FOLD]),  # minmaxnorm more mix more aug
    # ("../logs/2024-04-29/2/", [FOLD]),  # minmaxnorm fixed crop
    # ("../logs/2024-04-29/4/", [FOLD]),  # minmaxnorm fixed crop 20s selfmix
    # ("../logs/2024-04-29/6/", [FOLD]),  # minmaxnorm fixed crop no_xc selfmix
    # ("../logs/2024-04-29/7/", [FOLD]),  # minmaxnorm fixed crop no_xc selfmix focal_bce ousmk
    # ("../logs/2024-04-30/0/", [FOLD]),  # minmaxnorm selfmix focal_bce ousmk
    # ("../logs/2024-04-30/1/", [FOLD]),  # minmaxnorm selfmix focal_bce ousmk++
    # ("../logs/2024-04-30/3/", [FOLD]),  # minmaxnorm selfmix focal_bce no_xc more mix
    # ("../logs/2024-04-30/4/", [FOLD]),  # minmaxnorm selfmix bce no_xc ousmk^
    # ("../logs/2024-05-02/0/", [FOLD]),  # selfmix focal_bce ousmk + dedup, new melspec params, start-end sampling
    ("../logs/2024-05-02/15/", [FOLD]),  # no selfmix focal_bce ousmk + 2nd mask, new melspec params, start-end sampling
    ("../logs/2024-05-04/4/", [FOLD]),  # no selfmix bce + 2nd mask, new melspec params, start-end sampling + fewer epochs, wd AdamW
]

In [10]:
# from util.logger import upload_to_kaggle

# upload_to_kaggle(
#     [f for f, _ in EXP_FOLDERS],
#     directory="../output/dataset_1/",
#     dataset_name="BirdCLEF 2024 Weights 1",
#     update_folders=True
# )

### Preprocessing

In [11]:
if EVAL:
    df = pd.DataFrame({"path": glob.glob(DATA_PATH + "*/*")})
    df["id"] = df["path"].apply(lambda x: x.split("/")[-1][:-4])

    folds = pd.read_csv('../input/folds_4.csv')
    folds['id'] = folds['filename'].apply(lambda x: x.split('/')[-1][:-4])
    df = df.merge(folds)
    df = df[df['fold'] == 0].reset_index(drop=True)

    df["primary_label"] = df["path"].apply(lambda x:  x.split('/')[-2])
else:
    df = pd.DataFrame({"path": glob.glob(DATA_PATH + "*")})
    df["id"] = df["path"].apply(lambda x: x.split("/")[-1][:-4])
    
    df["duration"] = df["path"].apply(lambda x: librosa.get_duration(path=x))
    df = df[df["duration"] == 240].reset_index(drop=True)

In [12]:
df.head()

Unnamed: 0,path,id,duration
0,../input/unlabeled_soundscapes/184575141.ogg,184575141,240.0
1,../input/unlabeled_soundscapes/1542255759.ogg,1542255759,240.0
2,../input/unlabeled_soundscapes/1976786596.ogg,1976786596,240.0
3,../input/unlabeled_soundscapes/106748716.ogg,106748716,240.0
4,../input/unlabeled_soundscapes/523220948.ogg,523220948,240.0


### Models

In [13]:
models = []

for exp_folder, folds in EXP_FOLDERS:
    models_ = []
    config = Config(json.load(open(exp_folder + "config.json", "r")))

    model = define_model(
        config.name,
        config.melspec_config,
        head=config.head,
        aug_config=config.aug_config,
        num_classes=config.num_classes,
        n_channels=config.n_channels,
        drop_rate=config.drop_rate,
        drop_path_rate=config.drop_path_rate,
        # exportable=True,
        verbose=True,
        pretrained=False
    )
    model = model.to(DEVICE).eval()
    
    for fold in folds:
        weights = exp_folder + f"{config.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
        models_.append(model)
        
    models.append(models_)


 -> Loading encoder weights from ../logs/2024-05-02/15/tf_efficientnetv2_s_fullfit_0.pt



### Export

In [40]:
if RUNTIME != "torch":
    import onnx
    import onnxruntime as ort
    from onnxconverter_common import float16

    def infer_onnx(ort_session, x, output_names=["output"], input_name="x"):
        x = ort_session.run(output_names, {input_name: x.numpy()})[0]
        return x

    input_names = ['x']
    output_names = ['output']

    input_tensor = torch.randn(
        BATCH_SIZE,
        1,
        config.melspec_config['n_mels'],
        313 if config.melspec_config['hop_length'] == 512 else 224
    )

    onnx_ckpt_list = []
    for models_ in models:
        for i, model in enumerate(models_):
            torch.onnx.export(
                model.encoder,
                input_tensor,
                f"model_{i}.onnx",
                verbose=False,
                input_names=input_names,
                output_names=output_names,
                dynamic_axes={"x": [0]}
            )
            onnx_ckpt_list.append(f"model_{i}.onnx")


    ort_sessions = []
    for i in range(len(models)):
        onnx_model = onnx.load(f"model_{i}.onnx")
        # onnx_model = float16.convert_float_to_float16(onnx_model)
        # onnx.save(onnx_model, f"model_{i}.onnx")
        onnx.checker.check_model(onnx_model)
        ort_session = ort.InferenceSession(f"model_{i}.onnx")
        ort_sessions.append(ort_session)
        
    ort_session_2 = ort_sessions[0]
    out = ort_session_2.run(output_names, {input_names[0] : input_tensor.numpy()})  # .astype(np.float16)
    print(out[0].shape)

(48, 1280, 4, 10)


In [41]:
if RUNTIME == "openvino":
    !mo --input_model model_0.onnx # --compress_to_fp16=False

[ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False.
Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html
[ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. Please use OpenVINO Model Converter (OVC). OVC represents a lightweight alternative of MO and provides simplified model conversion API. 
Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html
[ SUCCESS ] Generated IR version 11 model.
[ SUCCESS ] XML file: /home/tviel/work/kaggle_birdclef_2024/src/model_0.xml
[ SUCCESS ] BIN file: /home/tviel/work/kaggle_birdclef_2024/src/model_0.bin


In [42]:
if RUNTIME == "openvino":
    import openvino.runtime as ov
    core = ov.Core()
    openvino_model = core.read_model(model='model_0.xml')
    compiled_model = core.compile_model(openvino_model, device_name="CPU")
    infer_request = compiled_model.create_infer_request()

### Main

In [33]:
RUNTIME = "openvino"  # torch openvino

In [34]:
def load_sample(path):
    wave, sr = librosa.load(path, sr=SR)

    if EVAL:
        if len(wave) > SR * DURATION:
            wave = wave[:SR * DURATION][None]
        else:
            wave = np.pad(wave, (0, SR * DURATION - len(wave)))[None]
    else:
        wave = wave.reshape(-1, SR * DURATION)

    if config.normalize:
        wave = np.array([librosa.util.normalize(w) for w in wave])

    wave = torch.from_numpy(wave)
    return wave

In [35]:
def infer_sample(wave):
    if isinstance(wave, str):
        wave = load_sample(wave)
    
    with torch.no_grad():
        with torch.cuda.amp.autocast(enabled=False):
            melspec = model.ft_extractor(wave)[0].unsqueeze(1)

        # y_pred = torch.zeros((48, 182))

        if RUNTIME == "openvino":
            fts = infer_request.infer(inputs=[melspec.numpy()])["output"]
            y_pred = model.get_logits(torch.from_numpy(fts))
        elif RUNTIME == "onnx":
            fts = infer_onnx(ort_session, melspec)
            y_pred = model.get_logits(torch.from_numpy(fts))
        else:
            with torch.cuda.amp.autocast(enabled=USE_FP16):
                fts = model.encoder(melspec)
                y_pred = model.get_logits(fts)
        y_pred = y_pred.detach().numpy()

    return y_pred

In [36]:
df = df[df["path"].apply(lambda x: "1872382287" in x)].reset_index(drop=True)

In [37]:
df = df.head(500)

In [38]:
waves = joblib.Parallel(n_jobs=4)(  # , backend='loky'
    joblib.delayed(load_sample)(path) for path in tqdm(df["path"].values)
)

100%|██████████| 1/1 [00:00<00:00, 1955.39it/s]




In [39]:
all_preds = [infer_sample(wave) for wave in tqdm(waves)]  # Torch - 2:46

  0%|          | 0/1 [00:00<?, ?it/s]

NameError: name 'infer_request' is not defined

In [304]:
all_preds = [infer_sample(wave) for wave in tqdm(waves)]  # OV FP16 - 2:11

  0%|          | 0/500 [00:00<?, ?it/s]

In [318]:
all_preds = [infer_sample(wave) for wave in tqdm(waves)]  # OV FP32 - 2:11

  0%|          | 0/500 [00:00<?, ?it/s]

In [280]:
all_preds = [infer_sample(path) for path in tqdm(df["path"].values)]  # no model

  0%|          | 0/500 [00:00<?, ?it/s]

In [282]:
all_preds = [infer_sample(path) for path in tqdm(df["path"].values)]  # no melspec

  0%|          | 0/500 [00:00<?, ?it/s]

In [284]:
all_preds = [infer_sample(path) for path in tqdm(df["path"].values)]  # only load

  0%|          | 0/500 [00:00<?, ?it/s]

In [31]:
inference_rows = []
for idx in tqdm(range(len(df))):

    y_pred = all_preds[idx]
    preds = expit(y_pred)

    for t, pred in enumerate(preds):
        predictions = dict([(l, p) for l, p in zip(CLASSES, pred)])
        inference_rows.append(
            {'row_id': f'{df.id[idx]}_{(t + 1) * 5}' } | predictions
        )

sub = pd.DataFrame(inference_rows)

100%|██████████| 1/1 [00:00<00:00, 749.25it/s]


In [32]:
sub.head()

Unnamed: 0,row_id,asbfly,ashdro1,ashpri1,ashwoo2,asikoe2,asiope1,aspfly1,aspswi1,barfly1,...,whbwoo2,whcbar1,whiter2,whrmun,whtkin2,woosan,wynlau1,yebbab1,yebbul3,zitcis1
0,1872382287_5,0.002922,0.129089,0.00039,0.00056,0.003429,0.000327,0.001513,0.000135,0.000581,...,0.002118,0.000214,0.000852,0.000594,0.002263,0.008572,0.001041,0.000571,0.011071,0.001834
1,1872382287_10,0.003955,0.03263,0.00085,0.000193,0.003024,0.000204,0.00083,0.000279,0.000449,...,0.005962,0.000296,0.003496,0.002194,0.005088,0.024762,0.000241,0.000161,0.000457,0.001841
2,1872382287_15,0.003516,0.066223,0.000459,7.5e-05,0.004243,0.000278,0.001076,0.000177,0.000183,...,0.002762,0.000901,0.001434,0.001074,0.015218,0.030678,0.000313,0.000496,0.001982,0.001014
3,1872382287_20,0.002976,0.003322,0.007874,0.000731,0.013947,0.000532,0.001118,0.001784,0.002137,...,0.008639,0.000393,0.003597,0.001014,0.003911,0.031162,0.001118,0.002764,0.001228,0.002651
4,1872382287_25,0.003274,0.065591,0.000857,0.000244,0.007834,0.000572,0.002124,0.000531,0.000317,...,0.00233,0.001801,0.003665,0.003726,0.005863,0.03073,0.001214,0.001157,0.001222,0.005482


In [212]:
if EVAL:
    preds = sub[CLASSES].values
    auc = macro_auc(df["primary_label"].values.tolist(), preds)
    print(f'Fold 0 AUC: {auc:.5f}')

Fold 0 AUC: 0.98153
