In [1]:
import pandas as pd
import numpy as np
import os
from IPython.display import display
import matplotlib.pyplot as plt
import math

from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import EfficientNetB7,preprocess_input
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import GroupKFold

## CFG

In [2]:
class CFG:
    debug=True if "get_ipython" in globals() else False

    batch_size=8 if debug else 256
    epochs=1 if debug else 20
    n_splits=4

    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

print(f"CFG.debug: {CFG.debug}")

CFG.debug: True


In [3]:
train=pd.read_csv(CFG.dataset_dir+"train.csv")

group_kfold=GroupKFold(n_splits=CFG.n_splits)
train_idx,val_idx=list(group_kfold.split(train,groups=train["PatientID"].values))[0]

In [4]:
class ImageDataSequence(keras.utils.Sequence):
    def __init__(self,batch_size,preprocessing_function):
        self.batch_size=batch_size
        self.preprocessing_function=preprocessing_function
    
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
    
    def flow_from_dataframe(self,dataframe,directory,x_col,y_col,ext:str,target_size=(256,256)):
        self.x=dataframe[x_col]+ext
        self.y=dataframe[y_col]
        self.directory=directory
        self.target_size=target_size

    def __getitem__(self, idx):
        start_idx=idx*self.batch_size
        last_idx=(idx+1)*self.batch_size
        batch_x, batch_y=self.x.iloc[start_idx:last_idx], self.y.iloc[start_idx:last_idx].values

        batch_x_imgs=[]
        for file_name in batch_x:
            batch_x_imgs.append(self.preprocess(os.path.join(self.directory,file_name)))
        batch_x_imgs_array=np.array(batch_x_imgs)

        return batch_x_imgs_array, batch_y

    def preprocess(self,path):
        img=keras.preprocessing.image.load_img(path,target_size=self.target_size)
        img_array=keras.preprocessing.image.img_to_array(img)/255
        img_array=self.preprocessing_function(img_array)
        
        return img_array

In [5]:
def get_ImageDataSequence_from_dataframe(dataframe):
    img_seq=ImageDataSequence(batch_size=CFG.batch_size,preprocessing_function=preprocess_input)
    img_seq.flow_from_dataframe(dataframe=dataframe,directory=CFG.dataset_dir+"train",
        x_col="StudyInstanceUID",y_col=CFG.target_cols,ext=".jpg")

    return img_seq

train_seq=get_ImageDataSequence_from_dataframe(train.iloc[train_idx])
val_seq=get_ImageDataSequence_from_dataframe(train.iloc[val_idx])

In [8]:
def check_sequence_output(img_seq,idx):
    imgs,labels=img_seq[idx]

    fig, axes=plt.subplots(2,4,figsize=(30,15))
    axes=axes.flatten()
    for img,label,ax in zip(imgs,labels,axes):
        ax.imshow(img)
        ax.set_title(label)
    plt.show()

# check_sequence_output(train_seq,10)

In [9]:
efficientnet_b7=EfficientNetB7(include_top=False,weights="imagenet",pooling="avg")
efficientnet_b7.trainable=False

In [10]:
transfer_model=Sequential([
    efficientnet_b7,
    Dense(11,activation="sigmoid")
])
transfer_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 2560)              64097687  
_________________________________________________________________
dense (Dense)                (None, 11)                28171     
Total params: 64,125,858
Trainable params: 28,171
Non-trainable params: 64,097,687
_________________________________________________________________


In [11]:
adam=Adam(learning_rate=1e-3)
transfer_model.compile(optimizer=adam,loss="binary_crossentropy",metrics=[keras.metrics.AUC(multi_label=True)])

lr_reducer = keras.callbacks.ReduceLROnPlateau(monitor="val_auc", patience=3, min_lr=1e-6, mode='max')

transfer_model.fit(x=train_seq,callbacks=[lr_reducer],max_queue_size=5,
    epochs=CFG.epochs,steps_per_epoch=len(train_idx)//CFG.batch_size,
    validation_data=val_seq,validation_steps=len(val_idx)//CFG.batch_size
)

   4/2820 [..............................] - ETA: 2:39:16 - loss: 0.5548 - auc: 0.3782

KeyboardInterrupt: 