In [None]:
import os, math
import psutil, random 

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import cv2; print(cv2.__version__)
import tensorflow as tf; print(tf.__version__)

In [None]:
MIXED_PRECISION = True
XLA_ACCELERATE  = False

GPUS = tf.config.experimental.list_physical_devices('GPU')
if GPUS:
    try:
        for GPU in GPUS:
            tf.config.experimental.set_memory_growth(GPU, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(GPUS), "Physical GPUs,", len(logical_gpus), "Logical GPUs") 
    except RuntimeError as  RE:
        print(RE)

if MIXED_PRECISION:
    policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    tf.keras.mixed_precision.experimental.set_policy(policy)
    print('Mixed precision enabled')

if XLA_ACCELERATE:
    tf.config.optimizer.set_jit(True)
    print('Accelerated Linear Algebra enabled')
    
print("Tensorflow version " + tf.__version__)

# Data Preprocess

In [None]:
study_df = pd.read_csv('../input/siim-covid19-detection/train_study_level.csv'); print(study_df.shape)
study_df['StudyInstanceUID'] = study_df['id'].apply(lambda x: x.replace('_study', ''))
del study_df['id']

def hot_to_sparse(row):
    return(row.index[row.apply(lambda x: x==1)][0])
study_df['diagnosis'] = study_df.apply(lambda row:hot_to_sparse(row), axis=1)
cls = {
    'Typical Appearance':1,                    
    'Negative for Pneumonia':2,                
    'Indeterminate Appearance':3,                     
    'Atypical Appearance':4,    
}
study_df['sparse_gt'] = study_df.diagnosis.map(cls) 

image_df = pd.read_csv('../input/siim-covid19-detection/train_image_level.csv'); print(image_df.shape)
train = image_df.merge(study_df, on='StudyInstanceUID')
train['id'] = train['id'].apply(lambda x: x.replace('_image', ''))
display(train.head()); print(train.shape)

In [None]:
class2 = pd.read_csv('../input/siim-cov19-csv-2class/train.csv')

class2['id'] = class2['id'].apply(lambda x: x.replace('_image', ''))
class2['sparse_gt'] = train['sparse_gt']
train = class2.copy()
train

# ROI Segment: Cropped Bounding Box

In [None]:
def vis(path1, path2, n_images, is_random=True, figsize=(16, 16)):
    '''
    https://gist.github.com/innat/00de7561033ba373745d425c6da7bf8c
    '''
    image_names = os.listdir(path1)
    masks_names = os.listdir(path2)
    
    for i in range(n_images):
        if is_random:
            image_name = random.choice(masks_names)
            masks_name = image_name
        else:
            image_name = masks_names[i]
            masks_name = masks_names[i]
            
        img = cv2.resize(cv2.imread(os.path.join(path1, image_name)), (512, 512))
        msk = cv2.resize(cv2.imread(os.path.join(path2, masks_name)), (512, 512))
        
        plt.figure(figsize=(20,20))
        plt.subplot(121); plt.imshow(img);
        plt.subplot(122); plt.imshow(msk);
        plt.show()

In [None]:
base_path = '../input/covid19-detection-890pxpng-study'
TRAIN_IMG_PATH =  os.path.join(base_path, 'train/')
TRAIN_MSK_PATH = os.path.join(base_path, 'ROI Mask/')

vis(TRAIN_IMG_PATH, TRAIN_MSK_PATH, 5, is_random=True)

In [None]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=101)
for index, (train_index, val_index) in enumerate(skf.split(X=train.index, y=train.sparse_gt)):
    train.loc[val_index, 'fold'] = index
    
print(train.groupby(['fold', train.sparse_gt]).size())

# Data Generator

In [None]:
import albumentations
from albumentations import *

# For Validation 
def albu_transforms_train(data_resize): 
    return albumentations.Compose([
           albumentations.Resize(data_resize, data_resize),
           albumentations.RandomResizedCrop(data_resize, data_resize, scale=(0.9, 1), p=1), 
           albumentations.HorizontalFlip(p=0.5),
           albumentations.ShiftScaleRotate(p=0.5),
           #albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7),
           albumentations.RandomBrightnessContrast(brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2), p=0.7),
           albumentations.CLAHE(clip_limit=(1,4), p=0.5),
     
          
       
          IAAPiecewiseAffine(p=0.2),
          IAASharpen(p=0.2),
          #albumentations.Cutout(max_h_size=int(data_resize * 0.1), max_w_size=int(data_resize * 0.1), num_holes=5, p=0.5),
          #albumentations.Normalize(),
        ])


# For Validation 
def albu_transforms_valid(data_resize): 
    return albumentations.Compose([
        albumentations.Resize(data_resize, data_resize)
        #A.ToFloat(), # no need if use keras.applicaiton.EfficientNets Bx
        ], p=1.)

In [None]:
class Covid19Generator(tf.keras.utils.Sequence):
    def __init__(self, img_path, msk_path, data, batch_size, random_state, 
                 idim, mdim, shuffle=True, transform=None, is_train=False):
        self.idim = idim
        self.mdim = mdim  
        self.data = data
        self.shuffle  = shuffle
        self.random_state = random_state
        
        self.img_path = img_path
        self.msk_path = msk_path
        self.is_train = is_train
        
        self.augment  = transform
        self.batch_size = batch_size
        
        self.list_idx = data.index.values
        self.label = self.data[['none']] if self.is_train else np.nan
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.floor(len(self.list_idx) / self.batch_size))
    
    def __getitem__(self, index):
        batch_idx = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        idx = [self.list_idx[k] for k in batch_idx]
        
        Data = np.zeros((self.batch_size,) + self.idim + (3,), dtype="float32")
        Mask = np.zeros((self.batch_size,) + self.mdim + (1,), dtype="float32")
        Target = np.zeros((self.batch_size, 1), dtype = np.float32)

        for i, k in enumerate(idx):
            # load the image file using cv2
            image = cv2.imread(self.img_path + self.data['id'][k] + '.png')[:, :, [2, 1, 0]]
            mask = cv2.imread(self.msk_path + self.data['id'][k] + '.png', 0)
            
            try:
                mask = cv2.resize(mask, self.mdim)[:, :, np.newaxis]
            except:
                mask = np.zeros_like(cv2.resize(image[:,:,:1], self.mdim))[:, :, np.newaxis]
          
            res = self.augment(image=image)
            image = res['image']
            
            # mask normalization must
            mask = mask.astype(np.float32)/255.0 

            # assign 
            if self.is_train:
                Data[i,] = image
                Mask[i,] = mask
                Target[i,] = self.label.iloc[k,].values #.values
            else:
                Data[i,] =  image 
        
        inps = {'input': Data}
        outs = {'clss': Target, 'segg': Mask}
        return inps, outs
    
    def on_epoch_end(self):
        self.indices = np.arange(len(self.list_idx))
        if self.shuffle:
            np.random.seed(self.random_state)
            np.random.shuffle(self.indices)

In [None]:
import matplotlib.pyplot as plt 
from pylab import rcParams

# helper function to plot sample 
def plot_imgs(dataset_show, row, col):
    rcParams['figure.figsize'] = 20,10
    for i in range(row):
        f, ax = plt.subplots(1,col)
        for p in range(col):
            idx = np.random.randint(0, len(dataset_show))
            img, label = dataset_show[idx]
            ax[p].grid(False)
            ax[p].imshow(label['segg'][0], cmap='gray')
            ax[p].set_title(label['clss'][0])
    plt.show()

In [None]:
fold = 0
img_size = 324
msk_sizze = 11
batch_size = 8

def count_data_items(length, b_max):
    batch_size = sorted([int(length/n) for n in range(1, length+1) \
                         if length % n == 0 and length/n <= b_max], reverse=True)[0]  
    steps  = length / batch_size 
    return batch_size, steps

def fold_generator(fold):
    # for way one - data generator
    train_labels = train[train.fold != fold].reset_index(drop=True)
    val_labels = train[train.fold == fold].reset_index(drop=True)

    train_generator = Covid19Generator(TRAIN_IMG_PATH, TRAIN_MSK_PATH,
                              train_labels, 
                              batch_size, 1234, (img_size, img_size), (msk_sizze, msk_sizze),
                              shuffle = True, is_train = True,
                              transform = albu_transforms_train(img_size))
    
    valid_batch, valid_step = count_data_items(len(val_labels), batch_size)

    val_generator = Covid19Generator(TRAIN_IMG_PATH, TRAIN_MSK_PATH,
                              val_labels, 
                              valid_batch, 1234, (img_size, img_size), (msk_sizze, msk_sizze),
                              shuffle = False, is_train = True,
                              transform = albu_transforms_valid(img_size))

    return train_generator, val_generator, train_labels, val_labels, valid_step


# first fold 
train_gen, val_gen, train_len, val_len, val_step = fold_generator(fold)

In [None]:
plot_imgs(train_gen, 5, 4) # plotting only 16x mask

In [None]:
from tensorflow.keras import Model 
from tensorflow.keras import Sequential 
from tensorflow.keras import Input 
from tensorflow.keras import layers 
from tensorflow.keras import applications 

class CovidNet(Model):
    def __init__(self):
        super(CovidNet, self).__init__()
        self.base = applications.EfficientNetB7(input_shape=(324, 324, 3),
                                                  include_top=False,
                                                  weights='imagenet')
        # desired model 
        self.base = Model(
                [self.base.inputs], 
                [self.base.get_layer('top_activation').output, self.base.output]
            )
        
        # tail / head for the classifier 
        self.tail = Sequential(
            [
                layers.GlobalAveragePooling2D(),
                layers.Dropout(0.2),
                #layers.BatchNormalization(),
                layers.Dense(1,activation='sigmoid'),
                #layers.Sigmoid()
            ]
        )
        
        # tail / head for the mask 
        self.msk = Sequential(
            [
                layers.Conv2D(filters=324, kernel_size=(1, 1), strides=(1, 1), padding="same"),
                layers.ReLU(),
                layers.BatchNormalization(),
                layers.Conv2D(filters=1, kernel_size=(1,1), padding="same")
            ]
        )

    # feed-forwarding  
    def call(self, inputs, training=None, **kwargs):
        segg, clss = self.base(inputs['input'])

        return {
            'clss': self.tail(clss), 
            'segg': self.msk(segg)
        }
    

tf.keras.backend.clear_session()
model = CovidNet()
model.build(input_shape={'input': (None, 324, 324, 3)})
model.summary()

# Training

In [None]:
# https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules
from tensorflow.keras.optimizers.schedules import LearningRateSchedule, ExponentialDecay

class WarmupLearningRateSchedule(LearningRateSchedule):
    """Provides a variety of learning rate decay schedules with warm up."""

    def __init__(self,
               initial_lr,
               steps_per_epoch=None,
               lr_decay_type='exponential',
               decay_factor=0.97,
               decay_epochs=2.4,
               total_steps=None,
               warmup_epochs=5,
               minimal_lr=0):
        super(WarmupLearningRateSchedule, self).__init__()
        self.initial_lr = initial_lr
        self.steps_per_epoch = steps_per_epoch
        self.lr_decay_type = lr_decay_type
        self.decay_factor = decay_factor
        self.decay_epochs = decay_epochs
        self.total_steps = total_steps
        self.warmup_epochs = warmup_epochs
        self.minimal_lr = minimal_lr

    def __call__(self, step):
        if self.lr_decay_type == 'exponential':
            assert self.steps_per_epoch is not None
            decay_steps = self.steps_per_epoch * self.decay_epochs
            lr = ExponentialDecay(self.initial_lr, decay_steps, 
                                  self.decay_factor, staircase=True)(step)
        elif self.lr_decay_type == 'cosine':
            assert self.total_steps is not None
            lr = 0.5 * self.initial_lr * (
              1 + tf.cos(np.pi * tf.cast(step, tf.float32) / self.total_steps))
            
        elif self.lr_decay_type == 'linear':
            assert self.total_steps is not None
            lr = (1.0 - tf.cast(step, tf.float32) / self.total_steps) * self.initial_lr
        elif self.lr_decay_type == 'constant':
            lr = self.initial_lr
        else:
            assert False, 'Unknown lr_decay_type : %s' % self.lr_decay_type

        if self.minimal_lr:
            lr = tf.math.maximum(lr, self.minimal_lr)

        if self.warmup_epochs:
            warmup_steps = int(self.warmup_epochs * self.steps_per_epoch)
            warmup_lr = (
              self.initial_lr * tf.cast(step, tf.float32) /
              tf.cast(warmup_steps, tf.float32))
            lr = tf.cond(step < warmup_steps, lambda: warmup_lr, lambda: lr)

        return lr

    def get_config(self):
        return {
            'initial_lr': self.initial_lr,
            'steps_per_epoch': self.steps_per_epoch,
            'lr_decay_type': self.lr_decay_type,
            'decay_factor': self.decay_factor,
            'decay_epochs': self.decay_epochs,
            'total_steps': self.total_steps,
            'warmup_epochs': self.warmup_epochs,
            'minimal_lr': self.minimal_lr,
        }

In [None]:
steps_per_epoch  = np.ceil(float(len(train_len)) / batch_size) 
validation_steps = val_step 
epochs = 20

lr_sched = 'cosine'
lr_base = 0.003
lr_min=1e-6
lr_decay_epoch = 2.4
lr_warmup_epoch = 5
lr_decay_factor = 0.97

scaled_lr = lr_base * (batch_size / 256.0)
scaled_lr_min = lr_min * (batch_size / 256.0)
total_steps = steps_per_epoch * epochs

learning_rate = WarmupLearningRateSchedule(
    scaled_lr,
    steps_per_epoch=steps_per_epoch,
    decay_epochs=lr_decay_epoch,
    warmup_epochs=lr_warmup_epoch,
    decay_factor=lr_decay_factor,
    lr_decay_type=lr_sched,
    total_steps=total_steps,
    minimal_lr=scaled_lr_min)

In [None]:
import tensorflow_addons as tfa

In [None]:
from tensorflow.keras import losses 
from tensorflow.keras import metrics
from tensorflow.keras import optimizers

# bind all
model.compile(
    loss = {
        'clss': losses.CategoricalCrossentropy(
            label_smoothing=0, from_logits=False),
        'segg': losses.BinaryCrossentropy(from_logits=True)
    },
    
    metrics = {
        'clss': [
            metrics.AUC(curve='ROC', multi_label=True),
            metrics.SpecificityAtSensitivity(0.60, name='@sensitivity')
        ]
    },
    
    optimizer = optimizers.Adam()
)

# list of call backs 
from tensorflow.keras import callbacks
checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f'model_{fold}.h5', save_best_only=True, monitor='val_loss', mode='min')
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", patience=3, min_lr=1e-6, mode='min')

# fitter 
model.fit(train_gen, 
          steps_per_epoch=steps_per_epoch,
          validation_data=val_gen, 
          validation_steps=validation_steps,
          callbacks=[checkpoint, lr_reducer], 
          workers=psutil.cpu_count(), verbose=2,
          epochs=epochs)