#### Code to validate models

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src

## Imports

In [None]:
import os
import cv2
import json
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import *
from collections import Counter
from tqdm.notebook import tqdm

In [None]:
from params import *

In [None]:
from data.preparation import prepare_dataframe, handle_duplicates
from data.dataset import CovidDetDataset, CovidClsDataset
from data.transforms import get_transfos_det, get_transfos_cls

from model_zoo.models import get_model

from utils.plot import plot_sample

from utils.logger import Config

from utils.metrics import per_class_average_precision_score, study_level_map

## Data

### Load

In [None]:
EXP_FOLDERS = [
#     LOG_PATH + "2021-07-25/10/",
#     LOG_PATH + "2021-07-26/1/",
#     LOG_PATH + "2021-07-26/2/",
    LOG_PATH + "2021-07-27/15/",
#     LOG_PATH + "2021-07-28/0/",
    LOG_PATH + "aphrodeep_2021-07-28_1/",
]

EXP_FOLDER = EXP_FOLDERS[-1]

USE_TTA = True

In [None]:
config = Config(json.load(open(EXP_FOLDER + "config.json", 'r')))

In [None]:
if USE_TTA:
    pred_oof_img = np.mean([np.load(f + "pred_oof_img_flip.npy") for f in EXP_FOLDERS], 0)
    pred_oof_study = np.mean([np.load(f + "pred_oof_study_flip.npy") for f in EXP_FOLDERS], 0)
else:
    pred_oof_img = np.mean([np.load(f + "pred_oof_img.npy") for f in EXP_FOLDERS], 0)
    pred_oof_study = np.mean([np.load(f + "pred_oof_study.npy") for f in EXP_FOLDERS], 0)

In [None]:
df = pd.read_csv(EXP_FOLDER + "data.csv")

pred_cols = [c + "_pred" for c in CLASSES]
df[pred_cols] = pred_oof_study
df['pred_img'] = pred_oof_img

In [None]:
auc = roc_auc_score(df['img_target'], pred_oof_img)
print(f'Image AUC : {auc :.3f}')
acc = accuracy_score(df['img_target'], pred_oof_img > 0.5)
print(f'Image Acc : {acc :.3f}')

In [None]:
study_map = study_level_map(df[pred_cols].values, df[CLASSES].values, df['study_id'].values)
print(f'Study mAP : {study_map :.4f}')

In [None]:
study_map = study_level_map(df[pred_cols].values, df[CLASSES].values, df['study_id'].values, agg=np.min)
print(f'Study mAP : {study_map :.4f}')

### Post-process

#### Img merging

In [None]:
groups = df[['study_id', 'pred_img']].groupby('study_id').mean().rename(
    columns={'pred_img': 'pred_img_merged'}
).reset_index()
df_ = df.merge(groups, on="study_id", how="left")

df_.loc[df_['negative_pred'] > 0.75, 'pred_img'] *= 0.66
df_.loc[df_['typical_pred'] > 0.75, 'pred_img'] *= 1.5
# df_.loc[df_['atypical_pred'] > 0.75, 'pred_img'] *= 1.5
df_.loc[df_['indeterminate_pred'] > 0.75, 'pred_img'] *= 1.5

In [None]:
auc = roc_auc_score(df_['img_target'], df_['pred_img'])
print(f'Image AUC : {auc :.3f}')
acc = accuracy_score(df_['img_target'], df_['pred_img'] > 0.5)
print(f'Image Acc : {acc :.3f}')

#### Study using img

In [None]:
df_ = df.copy()

df_['negative_pred'] *= 1 - df_['pred_img'] 
df_['typical_pred'] *= df_['pred_img'] 
# df_['indeterminate_pred'] *= df_['pred_img'] 
# df_['atypical_pred'] *= df_['pred_img'] 


study_map = study_level_map(df_[pred_cols].values, df_[CLASSES].values, df['study_id'].values)
print(f'Study mAP : {study_map :.4f}')

In [None]:
df_study = df[['study_id'] + pred_cols + CLASSES + ['pred_img']].groupby('study_id').agg(np.mean)

# df_study['negative_pred'] *= 1 - df_study['pred_img'] 
# df_study.loc[df_study['pred_img'] > 0.9, 'negative_pred'] *= 0.9
# df_study.loc[df_study['pred_img'] < 0.1, 'negative_pred'] *= 2
# df_study.loc[df_study['pred_img'] > 0.9, 'typical_pred'] *= 2
# df_study.loc[df_study['pred_img'] > 0.9, 'indeterminate_pred'] *= 2
# df_study.loc[df_study['pred_img'] > 0.9, 'atypical_pred'] *= 2

per_class_average_precision_score(
    df_study[pred_cols].values,
    df_study[CLASSES].values, 
    num_classes=NUM_CLASSES, 
    average=False
)

In [None]:
df_study = df[
    ['study_id'] + pred_cols + CLASSES + ['img_target', 'pred_img']
].groupby('study_id').agg(np.mean).copy()

# df_study['negative_pred'] *= 1 - df_study['pred_img'] 
# df_study['typical_pred'] *= df_study['pred_img'] 
# df_study['indeterminate_pred'] *= df_study['pred_img'] 
# df_study['atypical_pred'] *= df_study['pred_img'] 

df_study.loc[df_study['pred_img'] > 0.75, 'negative_pred'] *= 0.5
df_study.loc[df_study['pred_img'] < 0.2, 'negative_pred'] *= 2

df_study.loc[df_study['pred_img'] > 0.75, 'typical_pred'] *= 1.2
# df_study.loc[df_study['pred_img'] > 0.9, 'indeterminate_pred'] *= 1.1
# df_study.loc[df_study['pred_img'] > 0.9, 'atypical_pred'] *= 1.1

df_study.loc[df_study['pred_img'] < 0.25, 'typical_pred'] *= 0.8
# df_study.loc[df_study['pred_img'] < 0.2, 'indeterminate_pred'] *= 0.9
# df_study.loc[df_study['pred_img'] < 0.2, 'atypical_pred'] *= 0.9

accs = per_class_average_precision_score(
    df_study[pred_cols].values,
    df_study[CLASSES].values, 
    num_classes=NUM_CLASSES, 
    average=False
)
np.round(accs, 4), np.round(np.mean(accs) * 2/3, 4)

## Results

In [None]:
pred_oof_study = pd.read_csv('../output/OOF_study_only_EBV2M_768.csv')
# pred_oof_study = pd.read_csv('../output/submit_OOF_All_Folds.csv')

In [None]:
pred_oof_study['id'] = pred_oof_study['id'].apply(lambda x: x.split('_')[0])

# df_study = df[['study_id'] + pred_cols + CLASSES].groupby('study_id').agg(np.mean)
# df_study = df_study.merge(pred_oof_study, how="left", left_on="study_id", right_on="id").dropna()

In [None]:
df_study = pd.read_csv(DATA_PATH + "train_study_level.csv")
df_study['study_id'] = df_study['id'].apply(lambda x: x.split('_')[0])
df_study = df_study.rename(columns={c: c.split(' ')[0].lower() for c in df_study.columns})

df_study.drop('id', axis=1, inplace=True)

df_study = df_study.merge(pred_oof_study, how="left", left_on="study_id", right_on="id").dropna()

In [None]:
def proc(x):
    x = x.split('0 0 1 1')[:4]
    x = [float(y.strip().split(' ')[1]) for y in x]
    return x

pred_oof_study = np.array(df_study['PredictionString'].apply(proc).values.tolist())
# pred_oof_study = np.random.random(pred_oof_study.shape)
df_study[pred_cols] = pred_oof_study

In [None]:
df_g = df[['study_id'] + pred_cols].groupby('study_id').mean().reset_index()
df_study = df_study.merge(df_g, how="left", left_on="id", right_on="study_id", suffixes=['', '_theo'])

In [None]:
per_class_average_precision_score(
    df_study[pred_cols].values,
    df_study[CLASSES].values, 
    num_classes=NUM_CLASSES) * 2 / 3

In [None]:
per_class_average_precision_score(
    df_study[pred_cols].values + df_study[[p + "_theo" for p in pred_cols]].values,
    df_study[CLASSES].values, 
    num_classes=NUM_CLASSES) * 2 / 3

In [None]:
per_class_average_precision_score(
    df_study[[p + "_theo" for p in pred_cols]].values,
    df_study[CLASSES].values, 
    num_classes=NUM_CLASSES) * 2 / 3