**About** : This notebook is used to analyse preds

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

### Imports

In [None]:
import os
import gc
import sys
import cv2
import glob
import json
import torch
import shutil
import joblib
import pickle
import librosa
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn.functional as F
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from scipy.special import expit
from joblib import Parallel, delayed

warnings.simplefilter(action="ignore", category=UserWarning)

In [None]:
from util.logger import Config
from util.metrics import macro_auc
from util.torch import load_model_weights

from data.preparation import prepare_data, prepare_folds
from model_zoo.models import define_model

from params import CLASSES

### Params

In [None]:
DATA_PATH = "../input/unlabeled_soundscapes/"

In [None]:
BATCH_SIZE = 48
USE_FP16 = True
NUM_WORKERS = 4

DEVICE = "cuda" 
RUNTIME = "torch"

DURATION = 5
SR = 32000

In [None]:
to_plot = []
names = []

### CPMP

In [None]:
# with open('../output/cpmp_preds/pl_all.pkl', "rb") as f:
#     preds = pickle.load(f)

# # for f in [0, 1]: #2 , 3, 4]:
# #     preds_cpmp = {}
# #     for k in preds.keys():
# #         preds_cpmp[k[:-4].split('/')[-1]] = preds[k][f]

# #     names.append(f'CPMP-vitb0-{f}')
# #     to_plot.append(preds_cpmp)

# preds_cpmp = {}
# for k in preds.keys():
#     preds_cpmp[k[:-4].split('/')[-1]] = np.mean(preds[k], 0)
# names.append(f'CPMP-vitb0-avg')
# to_plot.append(preds_cpmp)

# del preds, preds_cpmp
# gc.collect()

### Pickle to sub

In [None]:
sub = pd.read_csv("../logs/2024-05-10/18/pl_sub_fullfit_0.csv")
ids = sub.row_id.apply(lambda x: x.split('_')[0]).unique()

In [None]:
with open('../output/cpmp_preds_72/pl_all.pkl', "rb") as f:
    preds = pickle.load(f)

# for f in [0, 1, 2, 3, 4]:
#     preds_cpmp = {}
#     for k in preds.keys():
#         preds_cpmp[k[:-4].split('/')[-1]] = preds[k][f]

#     names.append(f'CPMP72-vitb0-{f}')
#     to_plot.append(preds_cpmp)

preds_cpmp = {}
for k in preds.keys():
    preds_cpmp[k[:-4].split('/')[-1]] = np.mean(preds[k], 0)
names.append(f'CPMP72-vitb0-avg')
to_plot.append(preds_cpmp)

# del preds, preds_cpmp
# gc.collect()

In [None]:
inference_rows = []

for id in tqdm(ids):
    preds = preds_cpmp[id]
    # preds = preds_birdnet[id]
    for t in range(len(preds)):
        p = dict([(l, p) for l, p in zip(CLASSES, preds[t])])
        inference_rows.append(
            {"row_id": f"{id}_{(t + 1) * 5}"} | p
        )
sub_ = pd.DataFrame(inference_rows)

np.save('../output/cpmp_preds_107-4/pl_preds_avg.npy', sub_[CLASSES].values)
sub_[["row_id"]].to_csv('../output/cpmp_preds_72/pl_sub.csv', index=False)
sub_.head()

In [None]:
u = set(sub_.row_id.values) - set(sub.row_id.values)
sub_ = sub_[~sub_.row_id.isin(u)]
sub_ = sub_.reset_index(drop=True)

assert len(sub_) == len(sub)
assert (sub_.row_id == sub.row_id).all()

In [None]:
# sub_.to_csv('../output/cpmp_preds_72/pl_sub.csv', index=False)
sub_.to_csv('../output/pl_birdnet.csv', index=False)

### Theo

In [None]:
def avg_dicts(dicts):
    avg_dict = {k: None for k in dicts[0].keys()}
    for k in dicts[0].keys():
        avg_dict[k] = np.mean([d[k] for d in dicts], 0)
    return avg_dict

In [None]:
# EXP_FOLDER = "../logs/2024-05-06/18/"  # vitb0
EXP_FOLDERS = [
    # ("../logs/2024-05-05/7/", "v2s 0.64"),
    # ("../logs/2024-05-06/18/", "vitb0"),
    # ("../logs/2024-05-07/0/", "vitb0"),
    # ("../logs/2024-05-06/19/", "v2s"),
    # ("../logs/2024-05-06/22/", "vitb1"),
    # ("../logs/2024-05-07/5/", "vitb0"),
    ("../logs/2024-05-07/6/", "v2-b0"),
    # ("../logs/2024-05-07/7/", "vitb1"),
    # ("../logs/2024-05-10/18/", "vitb0PL"),
]

for e, n in EXP_FOLDERS:
    files = glob.glob(e + "pl_sub_*.csv")
    l = len(files)
    for i in range(len(files)):
        try:
            try:
                sub = pd.read_csv(files[i])
                names.append(n + f"_{i}")
                do_break = False
            except:
                sub = pd.read_csv(e + f'pl_sub.csv')
                names.append(n)
                do_break = True
            ids = sub['row_id'].apply(lambda x: x.split('_')[0]).values[::48]

            preds_theo = dict(zip(
                ids,
                sub[CLASSES].values.reshape(-1, 48, 182),
            ))
            to_plot.append(preds_theo)
            
            del (sub, preds_theo)
            gc.collect()

            if do_break:
                break
        except:
            pass

    if not do_break:
        print(f"Average {names[-l:]}")
        avg = avg_dicts(to_plot[-l:])
        to_plot = to_plot[:-l]
        names = names[:-l]
        to_plot.append(avg)
        names.append(n + f"_avg")


### Compare

In [None]:
names

In [None]:
th = 0.2
LIM = 20

In [None]:
# keys = ["1976786596", "106748716", "523220948"]
keys = list(to_plot[0].keys())[:LIM]
# keys = ['184575141', '1542255759', '1976786596', '106748716', '523220948',
#        '616589570', '95561326', '1908248554', '901569271', '1681605492']

In [None]:
for k in keys:
    # print(k)
    plt.figure(figsize=(20, 5))

    for i, c in enumerate(CLASSES):
        if any(
            [p[k][:, i].max() > th for p in to_plot]
        ):
            for p in range(len(to_plot)):
                plt.subplot(1, len(to_plot), p + 1)
                plt.plot(to_plot[p][k][:, i], label=c)
                plt.ylim(0, 1)
                plt.title(f'{k}\n{names[p]}')

    plt.legend()
    plt.show()

Done !