In [15]:
import pandas as pd
import numpy as np
from IPython.display import display

import tensorflow as tf
from tensorflow.io import FixedLenFeature
from tensorflow import keras
from tensorflow.keras.applications.xception import Xception

from sklearn.model_selection import GroupKFold

In [22]:
class CFG:
    debug=True
    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    batch_size=4 if debug else 256
    n_epochs=2 if debug else 20
    n_folds=10
    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

In [3]:
train=pd.read_csv(f"{CFG.dataset_dir}train.csv")

In [4]:
feature_description={
    "CVC - Abnormal":FixedLenFeature([],tf.int64),
    "CVC - Borderline":FixedLenFeature([],tf.int64),
    "CVC - Normal":FixedLenFeature([],tf.int64),
    "ETT - Abnormal":FixedLenFeature([],tf.int64),
    "ETT - Borderline":FixedLenFeature([],tf.int64),
    "ETT - Normal":FixedLenFeature([],tf.int64),
    "NGT - Abnormal":FixedLenFeature([],tf.int64),
    "NGT - Borderline":FixedLenFeature([],tf.int64),
    "NGT - Incompletely Imaged":FixedLenFeature([],tf.int64),
    "NGT - Normal":FixedLenFeature([],tf.int64),
    "Swan Ganz Catheter Present":FixedLenFeature([],tf.int64),
    "StudyInstanceUID":FixedLenFeature([],tf.string),
    "image":FixedLenFeature([],tf.string),
}

def parse_examples(example):
    return tf.io.parse_example(example,feature_description)

In [25]:
class dset_split():
    def __init__(self,n_folds:int):
        self.n_folds=n_folds
        self.train=pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv")
    
    def set_fold(self,fold:int):
        train_idx,val_idx=list(GroupKFold(n_splits=self.n_folds).split(self.train,groups=self.train["PatientID"]))[fold]
        train_uid_array=self.train.iloc[train_idx,0].values
        val_uid_array=self.train.iloc[val_idx,0].values
        self.train_uid_tensor=tf.convert_to_tensor(train_uid_array,dtype=tf.string)
        self.val_uid_tensor=tf.convert_to_tensor(val_uid_array,dtype=tf.string)
    
    def train_filter(self,parsed_record):
        filtered=tf.math.equal(parsed_record["StudyInstanceUID"],self.train_uid_tensor)
        return tf.math.reduce_any(filtered)

    def val_filter(self,parsed_record):
        filtered=tf.math.equal(parsed_record["StudyInstanceUID"],self.val_uid_tensor)
        return tf.math.reduce_any(filtered)


AUTO=tf.data.experimental.AUTOTUNE

def preprocessing(parsed_record):
    image=tf.io.decode_png(parsed_record["image"],channels=3) # decode_imageではresizeでエラーが出る
    image=tf.cast(image,tf.float32)
    image=tf.reshape(image,[299,299,3]) # without this, TPU will not run
    image/=255.0 # normalization

    label=[]
    for col_name in CFG.target_cols:
        label.append(parsed_record[col_name])
    
    return image,label

def augment_(img,label):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_flip_up_down(img)
    return img,label

def build_datasets(repeat=False):
    record_name="../input/ranzcr_299x299.tfrec" if CFG.debug else "gs://km_kaggle_catheter/train_resized.tfrec"
    raw_dataset=tf.data.TFRecordDataset([record_name])
    dset=raw_dataset.map(parse_examples,num_parallel_calls=AUTO)
    dset=dset.cache() # apply time-consuming process before cache()
    train_dset=dset.filter(splitter.train_filter)
    val_dset=dset.filter(splitter.val_filter)
    return train_dset,val_dset


def dset_postprocessing(dset,repeat=False,augument=True,shuffle=1024):
    dset=dset.map(preprocessing,num_parallel_calls=AUTO) # memory-consuming process should be placed after cache()
    dset=dset.map(augment_,num_parallel_calls=AUTO) if augument else dset
    dset=dset.repeat() if repeat else dset
    dset=dset.shuffle(shuffle) if shuffle else dset
    dset=dset.batch(CFG.batch_size).prefetch(AUTO) # Warning: putting preprocessing after batch() causes an error
    return dset


splitter=dset_split(n_folds=CFG.n_folds)
splitter.set_fold(0)
train_dset,val_dset=build_datasets(splitter)
train_dset=dset_postprocessing(train_dset)
val_dset=dset_postprocessing(val_dset,augment=False,shuffle=False,repeat=False)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'g

In [12]:
xception=Xception(include_top=False,weights="imagenet",input_shape=(299,299,3),pooling="avg")
xception_dense=keras.Sequential([
    xception,
    keras.layers.Dense(11,activation="sigmoid")
])
xception_dense.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
xception (Functional)        (None, 2048)              20861480  
_________________________________________________________________
dense (Dense)                (None, 11)                22539     
Total params: 20,884,019
Trainable params: 20,829,491
Non-trainable params: 54,528
_________________________________________________________________


In [None]:
adam=keras.optimizers.Adam(learning_rate=1e-3)
xception_dense.compile(optimizer=adam,loss="binary_crossentropy",metrics=[keras.metrics.AUC(multi_label=True,name="auc")])

save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
fit_callbacks=[
    callbacks.EarlyStopping(monitor="val_auc",patience=8,mode="max"),
    callbacks.ReduceLROnPlateau(monitor="val_auc",patience=4,min_lr=1e-7,mode="max",factor=0.5,verbose=1),
    callbacks.ModelCheckpoint("ckpt",monitor="val_auc",mode="max",save_best_only=True,options=save_locally)
]

In [None]:
steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
history = xception_dense.fit(
    train_dataset, 
    epochs=CFG.n_epochs,
    verbose=1,
    callbacks=fit_callbacks,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_dataset)

In [None]:
models.save_model(xception_dense,"last_ckpt",options=save_locally)

In [None]:
hist_df = pd.DataFrame(history.history)
hist_df.to_csv('history.csv')