In [14]:
import numpy as np
import pandas as pd
import os
from IPython.display import display
from tqdm.notebook import tqdm
import sys
from glob import glob

import tensorflow as tf
from tensorflow import keras
from tensorflow.io import FixedLenFeature
from tensorflow.keras.applications.efficientnet import  EfficientNetB7

import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver().connect()
strategy = tf.distribute.TPUStrategy(tpu)
print("Running on TPU:", tpu.master())

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)

In [8]:
class CFG:
    debug=True

    dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
    target_size=(256,256)
    batch_size=8 if debug else strategy.num_replicas_in_sync*16
    epochs=10 if debug else 20

    target_cols=['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present']

print(CFG.debug)

True


In [3]:
feature_description={
    "CVC - Abnormal":FixedLenFeature([],tf.int64),
    "CVC - Borderline":FixedLenFeature([],tf.int64),
    "CVC - Normal":FixedLenFeature([],tf.int64),
    "ETT - Abnormal":FixedLenFeature([],tf.int64),
    "ETT - Borderline":FixedLenFeature([],tf.int64),
    "ETT - Normal":FixedLenFeature([],tf.int64),
    "NGT - Abnormal":FixedLenFeature([],tf.int64),
    "NGT - Borderline":FixedLenFeature([],tf.int64),
    "NGT - Incompletely Imaged":FixedLenFeature([],tf.int64),
    "NGT - Normal":FixedLenFeature([],tf.int64),
    "Swan Ganz Catheter Present":FixedLenFeature([],tf.int64),
    "StudyInstanceUID":FixedLenFeature([],tf.string),
    "image":FixedLenFeature([],tf.string),
}

def parse_examples(example):
    return tf.io.parse_example(example,feature_description)

In [18]:
class dset_split():
    def __init__(self,n_folds:int):
        self.n_folds=n_folds
        self.train=pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv")
    
    def set_fold(self,fold:int):
        train_idx,val_idx=list(GroupKFold(n_splits=self.n_folds).split(self.train,groups=self.train["PatientID"]))[fold]
        train_uid_array=self.train.iloc[train_idx,0].values
        val_uid_array=self.train.iloc[val_idx,0].values
        self.train_uid_tensor=tf.convert_to_tensor(train_uid_array,dtype=tf.string)
        self.val_uid_tensor=tf.convert_to_tensor(val_uid_array,dtype=tf.string)
    
    def train_filter(self,parsed_record):
        filtered=tf.math.equal(parsed_record["StudyInstanceUID"],self.train_uid_tensor)
        return tf.math.reduce_any(filtered)

    def val_filter(self,parsed_record):
        filtered=tf.math.equal(parsed_record["StudyInstanceUID"],self.val_uid_tensor)
        return tf.math.reduce_any(filtered)
        
def augument(image):
    image=tf.image.random_crop(image,(256,256,3))
    image=tf.image.random_contrast(image,0.8,1.2)
    return image

def preprocessing(parsed_record):
    image=tf.io.decode_png(parsed_record["image"],channels=3) # decode_imageではresizeでエラーが出る
    image=tf.cast(image,tf.float32)
    image=tf.reshape(image,[300,300,3]) # without this, TPU will not run
    image=augument(image)
    image/=255.0 # normalization

    label=[]
    for col_name in CFG.target_cols:
        label.append(parsed_record[col_name])
    
    return image,label


def build_datasets(splitter):
    AUTO=tf.data.experimental.AUTOTUNE
    recordname="../input/train_300x300.tfrec" if CFG.debug else "gs://km_kaggle_catheter/train_300x300.tfrec"
    raw_dataset=tf.data.TFRecordDataset(recordname)
    dset=raw_dataset.map(parse_examples,num_parallel_calls=AUTO).cache() # apply time-consuming process before cache()
    train_dset=dset.filter(splitter.train_filter)
    val_dset=dset.filter(splitter.val_filter)
    
    return train_dset,val_dset


def dset_postprocessing(dset,repeat=False):
    AUTO=tf.data.experimental.AUTOTUNE

    dset=dset.map(preprocessing,num_parallel_calls=AUTO) # memory-consuming process should be placed after cache()
    dset=dset.repeat() if repeat else dset
    dset=dset.batch(CFG.batch_size,drop_remainder=True).prefetch(AUTO) # Warning: putting preprocessing after batch() causes an error
        # 並列学習させている場合は,最後にサンプル数の少ないミニバッチに対して学習すると
        # batch normにおいて0に近い値で除算され値が大きくなりすぎloss: nanとなる
    return dset


splitter=dset_split(n_folds=4)
splitter.set_fold(0)
train_dset,val_dset=build_datasets(splitter)
train_dset=dset_postprocessing(train_dset)
val_dset=dset_postprocessing(val_dset)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'g

In [20]:
save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')

checkpoint = tf.keras.callbacks.ModelCheckpoint("ckpt", save_best_only=True, monitor="val_auc", mode='max',options=save_locally)
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", patience=3, min_lr=1e-6, mode='max',verbose=1)

In [23]:
#with strategy.scope():
model=keras.Sequential([
    EfficientNetB7(include_top=False,weights="imagenet",input_shape=(256,256,3),pooling="avg",drop_connect_rate=0.7),
    keras.layers.Dense(11,activation="sigmoid")
])
model.compile(optimizer="adam",loss="binary_crossentropy",metrics=[keras.metrics.AUC(multi_label=True,name="auc")])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 2560)              64097687  
_________________________________________________________________
dense (Dense)                (None, 11)                28171     
Total params: 64,125,858
Trainable params: 63,815,131
Non-trainable params: 310,727
_________________________________________________________________


In [24]:
model.fit(train_dset,max_queue_size=5,epochs=CFG.epochs,callbacks=[checkpoint,lr_reducer],
         validation_data=val_dset)

Epoch 1/10
      1/Unknown - 0s 0s/step - loss: 0.6944 - auc: 0.2565