In [8]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from IPython.display import display
from sklearn.metrics import roc_auc_score
from pprint import pprint
from tensorflow import keras

In [7]:
class CFG:
   debug=True
   dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
   models_dir="./models/" if debug else ""
   target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
   'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
   n_folds=5

In [17]:
train=pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv")
test=pd.read_csv(f"{CFG.dataset_dir}train.csv") if CFG.debug else pd.read_csv(f"{CFG.dataset_dir}sample_submission.csv")

In [6]:
npz=np.load("../input/effnet_best_output.npz")
features_list=[npz[uid] for uid in tqdm(train["StudyInstanceUID"])]
features=np.array(features_list)

  0%|          | 0/30083 [00:00<?, ?it/s]

In [14]:
def create_model(fold:int):
    dense_dir=f"{CFG.models_dir}effnet_best_dense/"
    with open(f"{dense_dir}model.json","rt") as f:
        dense_model=keras.models.model_from_json(f.read())
    dense_model.load_weights(f"{dense_dir}ckpt_{fold}")
    return dense_model

dense_model_list=[create_model(n) for n in range(CFG.n_folds)]

In [18]:
dense_pred_list=[pd.DataFrame(dense_model_list[n].predict(features),columns=CFG.target_cols) for n in range(CFG.n_folds)]

submission=test.copy()

for col_name in CFG.target_cols:
    pred_col=pd.DataFrame()
    for n,pred in enumerate(dense_pred_list):
        pred_col[f"fold_{n}"]=pred[col_name]
    submission[col_name]=pred_col.mean(axis=1)
display(submission)

if not CFG.debug:
    submission.to_csv("submission.csv",index=False)

Unnamed: 0,StudyInstanceUID,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
0,1.2.826.0.1.3680043.8.498.26697628953273228189...,0.001258,0.000487,0.000332,0.011385,0.137658,0.008421,0.674002,0.222880,0.128204,0.036016,0.000304,ec89415d1
1,1.2.826.0.1.3680043.8.498.46302891597398758759...,0.005584,0.063126,0.963192,0.226725,0.044443,0.899897,0.020829,0.068652,0.041214,0.980235,0.011262,bf4c6da3c
2,1.2.826.0.1.3680043.8.498.23819260719748494858...,0.000309,0.000526,0.000213,0.000352,0.000222,0.000046,0.000722,0.004723,0.822312,0.263705,0.000123,3fc1c97e5
3,1.2.826.0.1.3680043.8.498.68286643202323212801...,0.000848,0.002815,0.002329,0.002313,0.001520,0.001542,0.005733,0.449446,0.548129,0.042099,0.000744,c31019814
4,1.2.826.0.1.3680043.8.498.10050203009225938259...,0.000354,0.000194,0.000473,0.000390,0.000603,0.000354,0.000318,0.001224,0.009989,0.996869,0.000197,207685cd1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30078,1.2.826.0.1.3680043.8.498.74257566841157531124...,0.000499,0.003804,0.994341,0.003840,0.005110,0.010109,0.008739,0.228518,0.540705,0.987893,0.002941,5b5b9ac30
30079,1.2.826.0.1.3680043.8.498.46510939987173529969...,0.000339,0.000176,0.000286,0.000221,0.000242,0.000648,0.000073,0.015051,0.016122,0.977212,0.000648,7192404d8
30080,1.2.826.0.1.3680043.8.498.43173270582850645437...,0.000860,0.022287,0.977209,0.006434,0.004605,0.925710,0.077896,0.395232,0.864094,0.362042,0.005338,d4d1b066d
30081,1.2.826.0.1.3680043.8.498.95092491950130838685...,0.000969,0.001429,0.000189,0.002883,0.001543,0.001044,0.000182,0.029830,0.780609,0.054084,0.000201,01a6602b8


In [19]:
results=pd.DataFrame(columns=CFG.target_cols)

for col_name in CFG.target_cols:
    results.loc["AUC",col_name]=roc_auc_score(test[col_name],submission[col_name])

display(results,results.mean(axis=1))

Unnamed: 0,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
AUC,0.986835,0.984116,0.996592,0.978768,0.970776,0.991322,0.991293,0.943553,0.904939,0.952546,0.999313


AUC    0.972732
dtype: float64

In [20]:
df=pd.read_csv("oof_df.csv").sort_values("StudyInstanceUID")
train=pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv").sort_values("StudyInstanceUID")

df=df.iloc[:,14:]
train=train.iloc[:,1:]

display(df)
display(train)

Unnamed: 0,pred_ETT - Abnormal,pred_ETT - Borderline,pred_ETT - Normal,pred_NGT - Abnormal,pred_NGT - Borderline,pred_NGT - Incompletely Imaged,pred_NGT - Normal,pred_CVC - Abnormal,pred_CVC - Borderline,pred_CVC - Normal,pred_Swan Ganz Catheter Present
15248,0.000106,0.000227,0.000114,0.000291,0.000032,0.000030,0.000008,0.010714,0.112789,0.899030,0.000012
15255,0.000402,0.000459,0.001493,0.001624,0.001824,0.000272,0.000672,0.030822,0.043551,0.913985,0.000307
5647,0.000134,0.000455,0.000763,0.000052,0.000286,0.000012,0.012631,0.069527,0.347159,0.754484,0.000376
4913,0.000065,0.000182,0.001127,0.000144,0.000164,0.000072,0.000222,0.020756,0.623359,0.302134,0.000135
29816,0.000032,0.000491,0.999148,0.002064,0.003982,0.932169,0.031456,0.001993,0.152436,0.998728,0.000087
...,...,...,...,...,...,...,...,...,...,...,...
23234,0.000301,0.000071,0.000199,0.000554,0.000529,0.000284,0.000126,0.011331,0.105220,0.947671,0.000059
8401,0.000140,0.001686,0.006414,0.001417,0.001700,0.000100,0.046394,0.999567,0.002600,0.005291,0.000060
23877,0.000130,0.000344,0.000408,0.000088,0.000035,0.000497,0.000006,0.001729,0.010156,0.997067,0.000016
25553,0.000428,0.000150,0.001308,0.001087,0.001629,0.000075,0.000912,0.021128,0.082669,0.908670,0.000098


Unnamed: 0,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present,PatientID
747,0,0,0,0,0,0,0,0,0,1,0,04cfbc3d2
773,0,0,0,0,0,0,0,0,0,1,0,176a5edfd
22581,0,0,0,0,0,0,0,0,0,1,0,d57f48a31
19594,0,0,0,0,0,0,0,0,0,1,0,e252d355a
29072,0,0,1,0,0,1,0,0,1,1,0,8849382d0
...,...,...,...,...,...,...,...,...,...,...,...,...
2826,0,0,0,0,0,0,0,0,0,1,0,61946329f
3580,0,0,0,0,0,0,0,1,0,0,0,30ccfd1fb
5273,0,0,0,0,0,0,0,0,0,1,0,54147ad50
11737,0,0,0,0,0,0,0,0,0,1,0,052d4b6b8


In [50]:
results=pd.DataFrame(columns=["AUC"])
for i in range(11):
    y_pred=df.iloc[:,i]
    y_true=train.iloc[:,i]
    auc=roc_auc_score(y_true,y_pred)
    row=pd.Series([auc],index=["AUC"],name=y_true.name)
    results=results.append(row)

display(results.T)
display(results.mean())

Unnamed: 0,ETT - Abnormal,ETT - Borderline,ETT - Normal,NGT - Abnormal,NGT - Borderline,NGT - Incompletely Imaged,NGT - Normal,CVC - Abnormal,CVC - Borderline,CVC - Normal,Swan Ganz Catheter Present
AUC,0.948148,0.949559,0.989415,0.925867,0.931456,0.976794,0.980471,0.883504,0.808819,0.878649,0.997692


AUC    0.93367
dtype: float64