In [1]:
import pandas as pd
import numpy as np
import os
import io
from IPython.display import display
import matplotlib.pyplot as plt
import math

from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import EfficientNetB7,preprocess_input
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import models

import tensorflow as tf
from tensorflow.train import Example
from tensorflow.data import TFRecordDataset

from sklearn.model_selection import GroupKFold

## CFG

In [2]:
class CFG:
    debug=True if "get_ipython" in globals() else False

    batch_size=8 if debug else 64
    epochs=10 if debug else 40
    n_splits=4

    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    models_dir="./models/" # "../input/efficientnet-lightgbm-models/"
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

print(f"CFG.debug: {CFG.debug}")

CFG.debug: True


In [3]:
def tfrecord_trial():
    index=0
    index_str=str(index).zfill(2)
    filenames=[f"../input/ranzcr-clip-catheter-line-classification/train_tfrecords/{index_str}-1881.tfrec"]
    raw_dataset=TFRecordDataset(filenames)

    feature_description={
        "StudyInstanceUID":tf.io.FixedLenFeature([],tf.string),
        "image":tf.io.FixedLenFeature([],tf.string),
    }

    for raw_record in raw_dataset.take(2):
        parsed=tf.io.parse_single_example(raw_record,feature_description)

        uid=parsed["StudyInstanceUID"].numpy().decode()
        img_raw=tf.image.decode_image(parsed["image"])
        
        print(uid)
        plt.figure(figsize=(8,8))
        plt.imshow(img_raw,cmap="Greys_r")
        plt.title(uid)

# tfrecord_trial()

In [4]:
train=pd.read_csv(CFG.dataset_dir+"train.csv")

group_kfold=GroupKFold(n_splits=CFG.n_splits)
train_idx,val_idx=list(group_kfold.split(train,groups=train["PatientID"].values))[0]

In [5]:
class ImageDataSequence(keras.utils.Sequence):
    def __init__(self,batch_size,preprocessing_function,target_size=(256,256)):
        self.batch_size=batch_size
        self.preprocessing_function=preprocessing_function
        self.target_size=target_size
    
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
    
    def flow_from_dataframe(self,dataframe,directory,x_col,y_col,ext:str):
        self.x=dataframe[x_col]+ext
        self.y=dataframe[y_col]
        self.directory=directory

    def __getitem__(self, idx):
        start_idx=idx*self.batch_size
        last_idx=(idx+1)*self.batch_size
        batch_x, batch_y=self.x.iloc[start_idx:last_idx], self.y.iloc[start_idx:last_idx].values

        batch_x_imgs=[]
        for file_name in batch_x:
            batch_x_imgs.append(self.preprocess(os.path.join(self.directory,file_name)))
        batch_x_imgs_array=np.array(batch_x_imgs)

        return batch_x_imgs_array, batch_y

    def preprocess(self,path):
        img=keras.preprocessing.image.load_img(path,target_size=self.target_size)
        img_array=keras.preprocessing.image.img_to_array(img)/255
        img_array=self.preprocessing_function(img_array)
        
        return img_array

In [6]:
def get_ImageDataSequence_from_dataframe(dataframe):
    img_seq=ImageDataSequence(batch_size=CFG.batch_size,preprocessing_function=preprocess_input)
    img_seq.flow_from_dataframe(dataframe=dataframe,directory=CFG.dataset_dir+"train",
        x_col="StudyInstanceUID",y_col=CFG.target_cols,ext=".jpg")

    return img_seq

train_seq=get_ImageDataSequence_from_dataframe(train.iloc[train_idx])
val_seq=get_ImageDataSequence_from_dataframe(train.iloc[val_idx])

In [7]:
def check_sequence_output(img_seq,idx):
    imgs,labels=img_seq[idx]

    fig, axes=plt.subplots(2,4,figsize=(30,15))
    axes=axes.flatten()
    for img,label,ax in zip(imgs,labels,axes):
        ax.imshow(img)
        ax.set_title(label)
    plt.show()

# check_sequence_output(train_seq,10)

In [8]:
def create_transfer_model():
    efficientnet_b7=EfficientNetB7(include_top=False,weights="imagenet",pooling="avg")
    efficientnet_b7.trainable=False

    transfer_model=Sequential([
        efficientnet_b7,
        Dense(11,activation="sigmoid")
    ])

    return transfer_model

input_shape=(2560,)
transfer_model=models.Sequential([
    Dropout(0.5,input_shape=input_shape),
    Dense(11,activation="sigmoid")
])
transfer_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout (Dropout)            (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 11)                28171     
Total params: 28,171
Trainable params: 28,171
Non-trainable params: 0
_________________________________________________________________


In [9]:
eff_output=pd.read_csv(f"{CFG.models_dir}efficientnet_output.csv")

In [10]:
dataset=pd.merge(train,eff_output,on="StudyInstanceUID")
train_dataset=dataset.iloc[train_idx]
val_dataset=dataset.iloc[val_idx]

In [11]:
adam=Adam(learning_rate=1e-3)
transfer_model.compile(optimizer=adam,loss="binary_crossentropy",metrics=[keras.metrics.AUC(multi_label=True)])

lr_reducer = keras.callbacks.ReduceLROnPlateau(monitor="val_auc", patience=2, min_lr=1e-6, mode='max', verbose=1)

"""
transfer_model.fit(x=train_seq,callbacks=[lr_reducer],max_queue_size=2,
    epochs=CFG.epochs,steps_per_epoch=len(train_idx)//CFG.batch_size,
    validation_data=val_seq,validation_steps=len(val_idx)//CFG.batch_size
)
"""

transfer_model.fit(x=train_dataset.iloc[:,-input_shape[0]:],y=train_dataset[CFG.target_cols],callbacks=[lr_reducer],
    epochs=CFG.epochs,validation_data=(val_dataset.iloc[:,-input_shape[0]:],val_dataset[CFG.target_cols]))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1f2b0938fd0>