In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
from tqdm import tqdm
import pickle

from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import callbacks

from sklearn.model_selection import GroupKFold

In [3]:
class CFG:
   debug=True
   dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
   models_dir="./models/" if debug else ""
   target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
      'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']
   n_folds=4

In [3]:
train=pd.read_csv(f"{CFG.dataset_dir}train.csv")
y=train[CFG.target_cols]

In [4]:
npz=np.load("../input/effnet_tuned_output.npz")
features_list=[npz[uid] for uid in tqdm(train["StudyInstanceUID"])]
features=np.array(features_list)

100%|██████████| 30083/30083 [00:51<00:00, 588.93it/s]


In [12]:
def compress_with_autoencoder(features):
    scaler=pickle.load(open("./models/minmaxscaler_effnet_tuned.pickle","rb"))
    X=scaler.transform(features)

    autoencoder_dir=f"{CFG.models_dir}autoencoder_tuned/"
    with open(f"{autoencoder_dir}model.json","rt") as f:
        model_json=f.read()
    autoencoder=models.model_from_json(model_json)
    autoencoder.load_weights(f"{autoencoder_dir}ckpt")

    layer_name="dense_1"
    compressing_model=models.Model(inputs=autoencoder.input,outputs=autoencoder.get_layer(layer_name).output)

    ae_pred=compressing_model.predict(X)
    ae_pred_df=pd.DataFrame(ae_pred)

    return ae_pred_df

In [14]:
def get_fold(train):
    fold=train.copy()
    splitter=GroupKFold(n_splits=CFG.n_folds)
    for n,(train_idx,val_idx) in enumerate(splitter.split(train,groups=train["PatientID"])):
        fold.loc[val_idx,"folds"]=n
    fold["folds"]=fold["folds"].astype(int)
    return fold

fold=get_fold(train)

In [6]:
def create_model():
    dense_model=keras.Sequential([
        keras.layers.Dense(len(CFG.target_cols),input_shape=(2560,),activation="sigmoid")
    ])
    adam=keras.optimizers.Adam(learning_rate=1e-4)
    dense_model.compile(optimizer=adam,loss="binary_crossentropy",metrics=[keras.metrics.AUC(multi_label=True,name="auc")])
    return dense_model

In [10]:
early_stopping=callbacks.EarlyStopping(monitor="val_auc",patience=50,mode="max")
lr_reducer=callbacks.ReduceLROnPlateau(monitor="val_auc",patience=7,min_lr=1e-6,mode="max",factor=0.5,verbose=1)

In [11]:
for n in range(CFG.n_folds):
    train_idx=(fold["folds"]!=n)
    val_idx=(fold["folds"]==n)
    X_train=features[train_idx]
    X_val=features[val_idx]
    y_train=y[train_idx]
    y_val=y[val_idx]

    dense_model=create_model()
    checkpoint=callbacks.ModelCheckpoint(f"{CFG.models_dir}effnet_tuned_dense_withoutAE/ckpt_{n}",monitor="val_auc",mode="max",
        save_weights_only=True,save_best_only=True)

    dense_model.fit(X_train,y_train,epochs=100,shuffle=True,validation_data=(X_val,y_val),callbacks=[early_stopping,lr_reducer,checkpoint])

    score=dense_model.evaluate(X_val,y_val,verbose=0)
    print(f"\nscore: {score}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100

KeyboardInterrupt: 