In [1]:
import os
import random
import time

import torch

import numpy as np
import pandas as pd
import torch
import torch.utils.data as torchdata

from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm

from dataset.dataset import WaveformDataset
from utils.metric import AverageMeter, MetricMeter, padded_cmap, target_columns
from utils.loss import mixup, mixup_criterion, cutmix, cutmix_criterion, loss_fn
from utils.file import Logger

# from models.resnext import TimmSED
# from config_resnext50 import CFG

# from models.nfnet import TimmSED
# from config_eca_nfnet_l0 import CFG

# from models.tf_efficientnetv2_s_in21k import TimmSED
# from config_tf_efficientnetv2_s_in21k import CFG

# from models.tf_efficientnetv2_m_in21k import TimmSED
# from config_tf_efficientnetv2_m_in21k import CFG

# from models.seresnext26t_32x4d import TimmSED
# from config_seresnext26t_32x4d import CFG

from models.tf_efficientnet_b0_ns import TimmSED
from config_tf_efficientnet_b0_ns import CFG


def set_seed(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def inference_fn(model, data_loader, device):
    model.eval()
    tk0 = tqdm(data_loader, total=len(data_loader))
    final_output = []
    final_target = []

    final_output_f = []
    final_output_s = []
    final_output_l = []
    final_output_fl = []
    final_output_sl = []
    with torch.no_grad():
        for b_idx, data in enumerate(tk0):
            inputs = data["image"].to(device)
            print(inputs.shape)
            targets = data["targets"].to(device).detach().cpu().numpy().tolist()
            output = model(inputs)
            output_c = output["clipwise_output"].cpu().detach().cpu().numpy().tolist()
            # print(output["framewise_output"].max(dim=1)[0], torch.max(output["framewise_output"], 1).values)
            output_f = torch.max(output["framewise_output"], 1).values.detach().cpu().numpy().tolist()
            output_s = torch.max(output["segmentwise_output"], 1).values.detach().cpu().numpy().tolist()
            output_l = torch.sigmoid(output["logit"]).cpu().detach().cpu().numpy().tolist()
            output_fl = torch.sigmoid(torch.max(output["framewise_logit"], 1).values).detach().cpu().numpy().tolist()
            output_sl = torch.sigmoid(torch.max(output["segmentwise_logit"], 1).values).detach().cpu().numpy().tolist()
            final_output.extend(output_c)
            final_output_f.extend(output_f)
            final_output_s.extend(output_s)
            final_output_l.extend(output_l)
            final_output_fl.extend(output_fl)
            final_output_sl.extend(output_sl)
            final_target.extend(targets)
    return final_output, final_target, final_output_f, final_output_l, final_output_fl, final_output_s, final_output_sl


def calc_cv(config, model_paths):

    df = pd.read_csv("train_folds.csv")
    y_true = []
    y_pred = []

    cmaps = []

    model = TimmSED(
                base_model_name=config.base_model_name,
                config=config,
                pretrained=config.pretrained,
                num_classes=config.num_classes,
                in_channels=config.in_channels
            )

    model.to(device)

    for fold, model_path in enumerate(model_paths):
        model.load_state_dict(torch.load(model_path))
        model.eval()

        val_df = df[df.kfold == fold].reset_index(drop=True)
        dataset = WaveformDataset(
            df=val_df,
            config=config,
            mode="valid"
        )
        dataloader = torch.utils.data.DataLoader(
            dataset, batch_size=config.valid_bs, num_workers=0, pin_memory=True, shuffle=False
        )

        final_output, final_target, final_output_f, final_output_l, final_output_fl, final_output_s, final_output_sl = inference_fn(model, dataloader, device)
        y_pred.extend(final_output)
        y_true.extend(final_target)
        torch.cuda.empty_cache()

        # cmAP = metrics.average_precision_score(np.array(final_target), np.array(final_output), average="macro")
        cmAP = padded_cmap(pd.DataFrame(np.array(final_target, dtype=np.int32), columns=target_columns), pd.DataFrame(np.array(final_output), columns=target_columns))
        cmAP_f = padded_cmap(pd.DataFrame(np.array(final_target, dtype=np.int32), columns=target_columns), pd.DataFrame(np.array(final_output_f), columns=target_columns))
        cmAP_l = padded_cmap(pd.DataFrame(np.array(final_target, dtype=np.int32), columns=target_columns), pd.DataFrame(np.array(final_output_l), columns=target_columns))
        cmAP_fl = padded_cmap(pd.DataFrame(np.array(final_target, dtype=np.int32), columns=target_columns), pd.DataFrame(np.array(final_output_fl), columns=target_columns))
        cmAP_s = padded_cmap(pd.DataFrame(np.array(final_target, dtype=np.int32), columns=target_columns), pd.DataFrame(np.array(final_output_s), columns=target_columns))
        cmAP_sl = padded_cmap(pd.DataFrame(np.array(final_target, dtype=np.int32), columns=target_columns), pd.DataFrame(np.array(final_output_sl), columns=target_columns))
       
        cmaps.append(cmAP)
        print(f"fold: {fold}, cmAP: {cmAP}, cmAP_f: {cmAP_f}, cmAP_l: {cmAP_l}, cmAP_fl: {cmAP_fl}, cmAP_s:{cmAP_s}, cmAP_sl: {cmAP_sl}.")

    f1_03 = metrics.f1_score(np.array(y_true), np.array(y_pred) > 0.3, average="micro")
    # cmAP = metrics.average_precision_score(np.array(y_true), np.array(y_pred), average="macro")

    cmAP = padded_cmap(pd.DataFrame(np.array(y_true, dtype=np.int), columns=target_columns), pd.DataFrame(np.array(y_pred), columns=target_columns))
     
    print(f"overall cmAP: {np.mean(cmaps)},  f1_03: {f1_03}.")
    return


if __name__ == "__main__":

    config = CFG()
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    train = pd.read_csv("./train_folds.csv")[["filename", "primary_label", "secondary_labels", "kfold", "duration"]]
    # extra = pd.read_csv("./extra_2.csv")[["filename", "primary_label", "secondary_labels", "kfold", "duration"]]
    # train = pd.concat([train, extra]).reset_index(drop=True)

    best_scores = [0.9855594767582085]
    model_paths = ["ckpts/{}/fold-{}_{}.bin".format(config.base_model_name, fold, best_scores[fold]) for fold in range(1)]
    calc_cv(config, model_paths)


ModuleNotFoundError: No module named 'dataset'