In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import albumentations as A
from albumentations.core.transforms_interface import DualTransform
import cv2
import numpy as np
import pandas as pd
import random

import gc
import os

from classification_models.tfkeras import Classifiers
from efficientnet.tfkeras import *

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import *
from tensorflow.keras.layers import *
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers import Optimizer, Adam

from tensorflow_addons.optimizers import SWA
from tensorflow_addons.losses import SigmoidFocalCrossEntropy

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

In [3]:
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
# policy = mixed_precision.Policy('mixed_float16')
# mixed_precision.set_policy(policy)

In [4]:
def set_random_seed():
    random.seed(2021)
    tf.random.set_seed(2020)
    np.random.seed(2019)
set_random_seed()

In [5]:
class HairAugmentation(DualTransform):
    """
    Impose an image of a hair to the target image

    Args:
        hairs (int): maximum number of hairs to impose
        hairs_folder (str): path to the folder with hairs images
    """

    def __init__(self, hairs, hairs_data, always_apply=False, p=0.5):
        super(HairAugmentation, self).__init__(always_apply, p)
        self.hairs = hairs
        self.hairs_data = hairs_data

    def apply(self, image, n_hairs, **params):
        """
        Args:
            img (PIL Image): Image to draw hairs on.

        Returns:
            PIL Image: Image with drawn hairs.
        """    
        if not n_hairs:
            return image
        
        height, width, _ = image.shape  # target image width and height
        
        for _ in range(n_hairs):
            idx = np.random.randint(0, len(self.hairs_data))
            hair = self.hairs_data[idx]
            hair = cv2.flip(hair, np.random.choice([-1, 0, 1]))
            hair = cv2.rotate(hair, np.random.choice([0, 1, 2]))

            h_height, h_width, _ = hair.shape  # hair image width and height
            roi_ho = np.random.randint(0, image.shape[0] - hair.shape[0])
            roi_wo = np.random.randint(0, image.shape[1] - hair.shape[1])
            roi = image[roi_ho:roi_ho + h_height, roi_wo:roi_wo + h_width]

            # Creating a mask and inverse mask
            img2gray = cv2.cvtColor(hair, cv2.COLOR_BGR2GRAY)
            ret, mask = cv2.threshold(img2gray, 10, 255, cv2.THRESH_BINARY)
            mask_inv = cv2.bitwise_not(mask)

            # Now black-out the area of hair in ROI
            img_bg = cv2.bitwise_and(roi, roi, mask=mask_inv)

            # Take only region of hair from hair image.
            hair_fg = cv2.bitwise_and(hair, hair, mask=mask)

            # Put hair in ROI and modify the target image
            dst = cv2.add(img_bg, hair_fg)

            image[roi_ho:roi_ho + h_height, roi_wo:roi_wo + h_width] = dst
                
        return image
    
    def get_params_dependent_on_targets(self, params):
        n_hairs = np.random.randint(0, self.hairs)
        return {'n_hairs': n_hairs }
    
    @property
    def targets_as_params(self):
        return ['image']
    
    def get_transform_init_args_names(self):
        return ('hairs', 'hairs_data')

In [6]:
IMG_SIZE = 256
prev_img_size = None
BATCH_SIZE = 32
SPLITS = 5

EPOCHS = 10

TRAIN_IMAGE_PATH = '../data/train_256'

ORIGINAL_TRAIN_DF = pd.read_csv('../data/cleaned_train.csv')
EXTERNAL_TRAIN_DF = pd.read_csv('../data/external_train.csv')

model_name = 'seresnet18'
PHI = 0
initial_lr = 1e-4
min_lr = 1e-5

# Prepare hairs data for Hair Augmentation
hairs_data = []
for file in os.listdir('../data/hairs'):
    img = cv2.imread(f'../data/hairs/{file}')
    h, w, _ = img.shape
    img = cv2.resize(img, (w // 4, h // 4), cv2.INTER_AREA)
    hairs_data.append(img)

train_aug = A.Compose([
#                 HairAugmentation(hairs=5, hairs_data=hairs_data),
                A.Flip(),
                A.ShiftScaleRotate(),
                A.RandomBrightnessContrast(),
                A.Cutout(num_holes=1, max_h_size=IMG_SIZE // 2, max_w_size=IMG_SIZE // 2)
            ])
test_aug = None

In [7]:
'''
    Data Generator
'''
class Datagen(Sequence):
    def __init__(self, list_ids, targets, batch_size, image_dir, generator=None, mode='training'):
        self.list_ids = list_ids
        self.targets = targets
        self.batch_size = batch_size
        self.image_dir = image_dir
        self.generator = generator
        self.indices = np.arange(self.list_ids.shape[0])
        self.mode = mode
        
    def preprocessing_image(self, image):
        image = image.astype('float32') / 255.
        return image
        
    def read_image(self, image_id):
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR) 
        return image
        
    def __len__(self):
        return int(np.floor(len(self.list_ids) / self.batch_size))
        
    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index + 1)*self.batch_size]
        image_ids = self.list_ids[indices]
        y = self.targets[indices]
        X = np.empty((len(image_ids), IMG_SIZE, IMG_SIZE, 3), dtype='float32')
        for i in range(len(image_ids)):
            image = self.read_image(image_ids[i])
            if self.generator is not None:
                image = self.generator(image=image)['image']
            image = self.preprocessing_image(image)
            X[i] = image
        return X, y
    
    def on_epoch_end(self):
        if self.mode == 'training':
            np.random.shuffle(self.indices)

In [8]:
'''
    Model
'''
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

def focal_loss(gamma=2., alpha=.75):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

def build_model(fold=None, prev_img_size=None):
    M = 0
    EFFNET_MODEL = [EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3]
    if 'efficientnet' in model_name:
        M = EFFNET_MODEL[PHI]
    else:
        M, _ = Classifiers.get(model_name)
    base_model = M(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    out = Dense(1, activation='sigmoid', kernel_initializer='he_normal')(x)
    model = Model(inputs=[base_model.input], outputs=[out])
    if prev_img_size is not None:
        model.load_weights(f'../data/model_checkpoint/{model_name}/{model_name}_{prev_img_size}_fold_{fold}.h5')
    opt = keras.optimizers.Adam(lr=initial_lr)
    opt = SWA(opt)
    opt = runai.ga.tfkeras.optimizers.Optimizer(opt, steps=4)
    
    model.compile(optimizer=opt, 
                  loss=focal_loss(),
                  metrics=[AUC()])
    return model

In [9]:
'''
    Callback
'''
def scheduler(epoch):
    return min_lr + (initial_lr - min_lr) * (1 + np.cos(np.pi * (epoch % EPOCHS) / EPOCHS)) / 2

class Evaluate(keras.callbacks.Callback):
    def __init__(self, fold, train_gen, valid_gen, model_checkpoint):
        self.fold = fold
        self.train_gen = train_gen
        self.valid_gen = valid_gen
        self.model_checkpoint = model_checkpoint
        self.best_score = float('-inf')
    
    def compute_auc(self, y_true, y_pred):
        return roc_auc_score(y_true, y_pred)
        
    def on_epoch_end(self, epoch, logs):
        self.train_gen.on_epoch_end()
        _, auc = self.model.evaluate_generator(self.valid_gen, verbose=1)
        new_lr = scheduler(epoch + 1)
        K.set_value(self.model.optimizer.lr, new_lr)
        print("Set LR to {}".format(new_lr))
        print("AUC: {}".format(auc))
        if auc > self.best_score:
            print(f"AUC improved from {self.best_score} to {auc}")
            self.best_score = auc
            self.model.save_weights(f'{self.model_checkpoint}/{model_name}_{IMG_SIZE}_fold_{self.fold}.h5')

In [10]:
external_image_ids = np.asarray(EXTERNAL_TRAIN_DF['image_name'])
external_targets = np.asarray(EXTERNAL_TRAIN_DF['target'], dtype='float32')

image_ids = np.asarray(ORIGINAL_TRAIN_DF['image_name'])
targets = np.asarray(ORIGINAL_TRAIN_DF['target'], dtype='float32')

image_ids = np.concatenate([image_ids, external_image_ids])
targets = np.concatenate([targets, external_targets])

In [11]:
'''
    Training
'''
kf = StratifiedKFold(n_splits=SPLITS, shuffle=True, random_state=2020)
fold = 0
val_auc = []
for train_idx, test_idx in kf.split(image_ids, targets):
    print(f"***********Fold {fold}*************")
    K.clear_session()
    gc.collect()
    
    model = build_model(fold, prev_img_size)
    
    train_image_ids, train_targets = image_ids[train_idx], targets[train_idx]
    test_image_ids, test_targets = image_ids[test_idx], targets[test_idx]
    
    train_gen = Datagen(train_image_ids, train_targets, BATCH_SIZE, TRAIN_IMAGE_PATH, train_aug)
    val_bs = 1
    for i in range(32, 1, -1):
        if len(test_image_ids) % i == 0:
            val_bs = i
            break
    val_gen = Datagen(test_image_ids, test_targets, val_bs, TRAIN_IMAGE_PATH, test_aug, 'valid')
    
    callbacks = [Evaluate(fold, train_gen, val_gen, f'../data/model_checkpoint/{model_name}')]
    
    model.fit_generator(train_gen,
                        steps_per_epoch=len(train_gen),
                        epochs=EPOCHS,
                        callbacks=callbacks,
                        verbose=1)
    val_auc.append(callbacks[0].best_score)
    fold += 1
    
print(val_auc)
print(np.mean(val_auc))

***********Fold 0*************


TypeError: __init__() missing 1 required positional argument: 'name'

- focal_loss_cv = 0.953
- add blur cv = 0.951
- flip, ssr, color = 0.952
- add cutout = 0.9525

- efficientnetb0 256 = 0.96022