In [41]:
import pandas as pd
import numpy as np
from IPython.display import display
from tqdm import tqdm
import pickle

from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import callbacks

from sklearn.model_selection import GroupKFold

In [9]:
class CFG:
    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
    n_folds=4

In [6]:
train=pd.read_csv(f"{CFG.dataset_dir}train.csv")
display(train.columns)

Index(['StudyInstanceUID', 'ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present',
       'PatientID'],
      dtype='object')

In [15]:
npz=np.load("../input/effnet_tuned_output.npz")
features_list=[npz[uid] for uid in tqdm(train["StudyInstanceUID"])]
features=np.array(features_list)

100%|██████████| 30083/30083 [00:52<00:00, 571.52it/s]


In [19]:
scaler=pickle.load(open("./models/minmaxscaler_effnet_tuned.pickle","rb"))
X=scaler.transform(features)

In [21]:
autoencoder_dir="./models/autoencoder_tuned/"
with open(f"{autoencoder_dir}model.json","rt") as f:
    model_json=f.read()
autoencoder=models.model_from_json(model_json)
autoencoder.load_weights(f"{autoencoder_dir}ckpt")
autoencoder.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1800)              4609800   
_________________________________________________________________
dense_1 (Dense)              (None, 100)               180100    
_________________________________________________________________
dense_2 (Dense)              (None, 1800)              181800    
_________________________________________________________________
dropout (Dropout)            (None, 1800)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 2560)              4610560   
Total params: 9,582,260
Trainable params: 9,582,260
Non-trainable params: 0
_________________________________________________________________


In [24]:
layer_name="dense_1"
hidden_layer_model=models.Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

ae_pred=hidden_layer_model.predict(X)
display(ae_pred.shape)

(30083, 100)

In [36]:
fold=train.copy()
splitter=GroupKFold(n_splits=CFG.n_folds)
for n,(train_idx,val_idx) in enumerate(splitter.split(train,groups=train["PatientID"])):
    fold.loc[val_idx,"folds"]=n
fold["folds"]=fold["folds"].astype(int)

target_fold=0
train_idx=(fold["folds"]!=target_fold)
val_idx=(fold["folds"]==target_fold)

In [37]:
ae_pred_df=pd.DataFrame(ae_pred)
X_train=ae_pred_df[train_idx]
X_val=ae_pred_df[val_idx]
y=train[CFG.target_cols]
y_train=y[train_idx]
y_val=y[val_idx]

In [43]:
dense_model=keras.Sequential([
    keras.layers.Dense(len(CFG.target_cols),input_shape=(100,),activation="sigmoid")
])
dense_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 11)                1111      
Total params: 1,111
Trainable params: 1,111
Non-trainable params: 0
_________________________________________________________________


In [44]:
adam=keras.optimizers.Adam(learning_rate=1e-4)
dense_model.compile(optimizer=adam,loss="binary_crossentropy",metrics=[keras.metrics.AUC(multi_label=True,name="auc")])

fit_callbacks=[
    callbacks.EarlyStopping(monitor="val_auc",patience=50,mode="max"),
    callbacks.ReduceLROnPlateau(monitor="val_auc",patience=10,min_lr=1e-7,mode="max",factor=0.5,verbose=1),
    callbacks.ModelCheckpoint("./models/effnet_tuned_dense/ckpt",monitor="val_auc",mode="max",
        save_weights_only=True,save_best_only=True)
]

dense_model.fit(X_train,y_train,epochs=100,shuffle=True,validation_data=(X_val,y_val),callbacks=fit_callbacks)

score=dense_model.evaluate(X_val,y_val,verbose=0)
print(f"\nscore: {score}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

KeyboardInterrupt: 