In [None]:
# get the data
!wget https://zenodo.org/record/53169/files/Kather_texture_2016_image_tiles_5000.zip?download=1
!unzip '/content/Kather_texture_2016_image_tiles_5000.zip?download=1'

In [None]:
import keras.backend as K
import tensorflow as tf
import numpy as np
from sklearn.metrics import roc_auc_score


# evaluation metrics for classification
def auc(y_true, y_pred):
    y_true = np.argmax(y_true, axis=1)
    y_pred = y_pred[:, 1:]

    auc_scores = []
    for i in range(y_pred.shape[1]):
        auc_scores.append(roc_auc_score(y_true, y_pred[:, i]))

    return np.mean(auc_scores)


def dice_score(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2.0 * intersection + 1) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1)


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))


def jaccard_distance(y_true, y_pred, smooth=100):
    intersection = tf.reduce_sum(y_true * y_pred, axis=(1, 2))
    sum_ = tf.reduce_sum(y_true + y_pred, axis=(1, 2))
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    jd = (1 - jac) * smooth
    return tf.reduce_mean(jd)


def tf_mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.5):
        y_pred_ = tf.cast(y_pred > t, tf.int32)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        prec.append(score)
    val = K.mean(K.stack(prec), axis=0)
    return [val, up_opt]


def cross_entropy_balanced(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)

    count_neg = tf.reduce_sum(1.0 - y_true)
    count_pos = tf.reduce_sum(y_true)

    beta = count_neg / (count_pos + count_neg)

    pos_weight = beta / (1 - beta)

    cost = tf.nn.weighted_cross_entropy_with_logits(
        logits=y_pred, labels=y_true, pos_weight=pos_weight
    )

    cost = tf.reduce_mean(cost * (1 - beta))

    return tf.where(tf.equal(count_pos, 0.0), 0.0, cost)


def pixel_error(y_true, y_pred):
    pred = tf.cast(tf.greater(y_pred, 0.5), tf.int32)
    error = tf.cast(tf.not_equal(pred, tf.cast(y_true, tf.int32)), tf.float32)

    return tf.reduce_mean(error)

In [None]:
from tensorflow.keras.preprocessing.image import (
    ImageDataGenerator,
    load_img,
    img_to_array,
)
from PIL import Image
import os
import random
import shutil
from sklearn.model_selection import KFold


MODE = "MORE"
if not MODE in ["NORMAL", "MORE", "FOLD"]:
    raise Exception(f"MODE needs to be NORMAL, MORE or FOLD not {MODE}")

"""
MORE: augment the data
FOLD: split the data into folds
NORMAL: split into test and train and nothing els
"""

dataset_path = "/content/Kather_texture_2016_image_tiles_5000"
train_test_path = "/content/train_test_folder"
train_ratio = 0.8 # Ratio of train images to total images
train_aug_factor = 1 # Factor by which train images are augmented

train_path = os.path.join(train_test_path, "train_aug")
test_path = os.path.join(train_test_path, "test")
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)


if MODE == "NORMAL":
    # Loop through each class folder in the dataset
    for class_folder in os.listdir(dataset_path):
        class_folder_path = os.path.join(dataset_path, class_folder)

        # Get the list of images in the class folder
        images_list = os.listdir(class_folder_path)

        # Shuffle the images randomly
        random.shuffle(images_list)

        # Split the images into train and test based on the train_ratio
        train_images_list = images_list[: int(len(images_list) * train_ratio)]
        test_images_list = images_list[int(len(images_list) * train_ratio) :]

        # Create a separate directory for each class within the train and test directories
        train_class_path = os.path.join(train_path, class_folder)
        test_class_path = os.path.join(test_path, class_folder)
        os.makedirs(train_class_path, exist_ok=True)
        os.makedirs(test_class_path, exist_ok=True)

        # Copy the train images to the train class directory
        for train_image in train_images_list:
            train_image_path = os.path.join(class_folder_path, train_image)
            train_image_dest_path = os.path.join(train_class_path, train_image)
            shutil.copy(train_image_path, train_image_dest_path)

        # Copy the test images to the test class directory
        for test_image in test_images_list:
            test_image_path = os.path.join(class_folder_path, test_image)
            test_image_dest_path = os.path.join(test_class_path, test_image)
            shutil.copy(test_image_path, test_image_dest_path)

elif MODE == "MORE":
    import math

    dataset_path = "/content/Kather_texture_2016_image_tiles_5000"
    train_test_path = "/content/train_test_folder"
    train_ratio = 0.7
    test_ratio = 0.3

    train_path = os.path.join(train_test_path, "train_aug")
    test_path = os.path.join(train_test_path, "test")
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(test_path, exist_ok=True)

    train_datagen = ImageDataGenerator(
        rotation_range=25,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode="reflect",
    )

    for class_folder in os.listdir(dataset_path):
        class_folder_path = os.path.join(dataset_path, class_folder)
        images_list = os.listdir(class_folder_path)
        random.shuffle(images_list)

        num_test_images = math.ceil(len(images_list) * test_ratio)
        num_train_images = len(images_list) - num_test_images

        test_images_list = random.sample(images_list, num_test_images)
        train_images_list = list(set(images_list) - set(test_images_list))

        train_class_path = os.path.join(train_path, class_folder)
        test_class_path = os.path.join(test_path, class_folder)
        os.makedirs(train_class_path, exist_ok=True)
        os.makedirs(test_class_path, exist_ok=True)

        # Copy original train images to train_aug directory
        for train_image in train_images_list:
            train_image_path = os.path.join(class_folder_path, train_image)
            train_image_dest_path = os.path.join(train_class_path, train_image)
            shutil.copy(train_image_path, train_image_dest_path)

        # Generate and copy augmented train images to train_aug directory
        for train_image in train_images_list:
            train_image_path = os.path.join(class_folder_path, train_image)
            train_image_name, train_image_ext = os.path.splitext(
                train_image
            )  # Split filename and extension
            for i in range(train_aug_factor):
                train_image_copy_path = os.path.join(
                    train_class_path, f"{train_image_name}_copy_{i}{train_image_ext}"
                )  # Add extension to new filename
                shutil.copy(train_image_path, train_image_copy_path)
                train_image_copy = img_to_array(load_img(train_image_copy_path))
                train_image_copy = train_datagen.random_transform(train_image_copy)
                train_image_copy = train_image_copy.astype("uint8")
                train_image_copy = Image.fromarray(train_image_copy)
                train_image_copy.save(train_image_copy_path)

        # Copy original test images to test directory
        for test_image in test_images_list:
            test_image_path = os.path.join(class_folder_path, test_image)
            test_image_dest_path = os.path.join(test_class_path, test_image)
            shutil.copy(test_image_path, test_image_dest_path)

elif MODE == "FOLD":
    dataset_path = "/content/Kather_texture_2016_image_tiles_5000"
    output_path = "/content/folds"
    n_folds = 5

    os.makedirs(dataset_path, exist_ok=True)
    os.makedirs(output_path, exist_ok=True)

    # Define data generator for image augmentation
    datagen = ImageDataGenerator(
        rotation_range=25,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode="reflect",
    )

    # Get list of all image files
    image_files = []
    for class_folder in os.listdir(dataset_path):
        class_folder_path = os.path.join(dataset_path, class_folder)
        images_list = os.listdir(class_folder_path)
        for image_name in images_list:
            image_path = os.path.join(class_folder_path, image_name)
            image_files.append((image_path, class_folder))

    # Shuffle the image files list
    random.shuffle(image_files)

    # Create KFold splitter
    kf = KFold(n_splits=n_folds)

    # Split the dataset into n_folds
    fold_idx = 1
    for train_index, test_index in kf.split(image_files):
        # Create output directories for this fold
        train_path = os.path.join(output_path, f"fold_{fold_idx}", "train")
        test_path = os.path.join(output_path, f"fold_{fold_idx}", "test")
        os.makedirs(train_path, exist_ok=True)
        os.makedirs(test_path, exist_ok=True)

        # Copy train images to output directory and augment them
        for idx in train_index:
            image_path, class_folder = image_files[idx]
            class_path = os.path.join(train_path, class_folder)
            os.makedirs(class_path, exist_ok=True)

            # Copy original image
            image_name = os.path.basename(image_path)
            dest_path = os.path.join(class_path, image_name)
            shutil.copy(image_path, dest_path)

            # Augment image
            image = img_to_array(load_img(image_path))
            for i in range(train_aug_factor):
                aug_image = datagen.random_transform(image)
                aug_image = aug_image.astype("uint8")
                aug_image = Image.fromarray(aug_image)
                aug_image_path = os.path.join(
                    class_path,
                    f"{os.path.splitext(image_name)[0]}_aug_{i}{os.path.splitext(image_name)[1]}",
                )
                aug_image.save(aug_image_path)

        # Copy test images to output directory
        for idx in test_index:
            image_path, class_folder = image_files[idx]
            class_path = os.path.join(test_path, class_folder)
            os.makedirs(class_path, exist_ok=True)

            # Copy original image
            image_name = os.path.basename(image_path)
            dest_path = os.path.join(class_path, image_name)
            shutil.copy(image_path, dest_path)

        # Increment fold index
        fold_idx += 1

In [None]:
import numpy as np
import cv2
import albumentations as A
import os
import tensorflow as tf


# Custom data generator for loading images in batches ram-efficiently
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(
        self,
        directory,
        batch_size,
        target_size=(150, 150),
        shuffle=True,
        augmentations=None,
    ):
        self.directory = directory
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.augmentations = augmentations
        self.class_names = sorted(os.listdir(directory))
        self.num_classes = len(self.class_names)
        self.samples = []
        for i, class_name in enumerate(self.class_names):
            class_dir = os.path.join(self.directory, class_name)
            for filename in os.listdir(class_dir):
                self.samples.append((os.path.join(class_dir, filename), i))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.samples) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_samples = self.samples[
            idx * self.batch_size : (idx + 1) * self.batch_size
        ]
        batch_images = []
        batch_labels = []
        for sample in batch_samples:
            image = cv2.imread(sample[0])
            # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, self.target_size)
            label = sample[1]
            if self.augmentations is not None:
                augmented = self.augmentations(image=image)
                image = augmented["image"]
            batch_images.append(image)
            batch_labels.append(label)
        return np.array(batch_images) / 255.0, tf.keras.utils.to_categorical(
            batch_labels, num_classes=self.num_classes
        )

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.samples)

# further augmentations if needed
train_augmentations = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
        A.Rotate(limit=(-20, 20), p=0.5),
        A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.3, brightness_limit=0.2, contrast_limit=0.2),
        A.ColorJitter(p=0.1, brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        A.Blur(p=0.2, blur_limit=(3, 7)),
        A.RandomRotate90(p=0.5),
        A.VerticalFlip(p=0.5),
        # A.CoarseDropout(max_holes=2, max_height=18, max_width=18, min_holes=1, p=0.1),
    ]
)

train_gen = CustomDataGenerator(train_path, 32, augmentations=train_augmentations)
test_gen = CustomDataGenerator(test_path, 32, augmentations=train_augmentations)


batch_size = 24
image_size = 200 # both height and width


def get_fold_generator(
    fold_path,
    batch_size=batch_size,
    target_size=(image_size, image_size),
    shuffle=True,
    augmentations=train_augmentations,
    mode="TRAIN",
):
    if mode == "TRAIN":
        generator = CustomDataGenerator(
            directory=fold_path,
            batch_size=batch_size,
            target_size=target_size,
            shuffle=shuffle,
            augmentations=augmentations,
        )
    elif mode == "VAL":
        generator = CustomDataGenerator(
            directory=fold_path,
            batch_size=batch_size,
            target_size=target_size,
            shuffle=shuffle,
            augmentations=augmentations,
        )
    elif mode == "TEST":
        generator = CustomDataGenerator(
            directory=fold_path,
            batch_size=batch_size,
            target_size=target_size,
            shuffle=False,
            augmentations=None,
        )
    return generator


# fold creation if set to fold
if MODE == "FOLD":
    f1_t = get_fold_generator("/content/folds/fold_1/train")
    f1_tt = get_fold_generator("/content/folds/fold_1/test", mode="TEST")
    f2_t = get_fold_generator("/content/folds/fold_2/train")
    f2_tt = get_fold_generator("/content/folds/fold_2/test", mode="TEST")
    f3_t = get_fold_generator("/content/folds/fold_3/train")
    f3_tt = get_fold_generator("/content/folds/fold_3/test", mode="TEST")
    f4_t = get_fold_generator("/content/folds/fold_4/train")
    f4_tt = get_fold_generator("/content/folds/fold_4/test", mode="TEST")
    f5_t = get_fold_generator("/content/folds/fold_5/train")
    f5_tt = get_fold_generator("/content/folds/fold_5/test", mode="TEST")
    train_gens = [f1_t, f2_t, f3_t, f4_t, f5_t]
    test_gens = [f1_tt, f2_tt, f3_tt, f4_tt, f5_tt]

In [None]:
import matplotlib.pyplot as plt

# checking the data
batch_x, batch_y = f1_t.__getitem__(10)
batch_x1, batch_y1 = f2_t.__getitem__(10)
batch_x2, batch_y2 = f3_t.__getitem__(10)
batch_x3, batch_y3 = f4_t.__getitem__(10)
batch_x4, batch_y4 = f5_t.__getitem__(10)

print(len(f1_t) * batch_size)
print(len(f2_t) * batch_size)
print(len(f3_t) * batch_size)
print(len(f4_t) * batch_size)


def check(x, y):
    print(f"shapes: x: {x.shape}, y: {y.shape}")
    print(f"norms: x: {np.min(x), np.max(x)}, y: {np.min(y), np.max(y)}")
    print(f"types: x: {type(x)}, y; {type(y)}")


check(batch_x, batch_y)


def dis_gen(x, y):
    n = len(x) // 8
    fig, axs = plt.subplots(nrows=1, ncols=n, figsize=(30, 30))
    for i in range(n):
        axs[i].imshow(x[i])
        axs[i].set_title(f"Label: {y[i]}")
        axs[i].axis(False)
    plt.show()

# displaying some samples
dis_gen(batch_x, batch_y)
dis_gen(batch_x1, batch_y1)
dis_gen(batch_x2, batch_y2)
dis_gen(batch_x3, batch_y3)
dis_gen(batch_x4, batch_y4)

In [None]:
print(len(train_gen) * 32)
print(len(test_gen) * 32)

In [None]:
batch_x, batch_y = f1_tt.__getitem__(0)
print(len(f1_tt) * batch_size)
print(len(f2_tt) * batch_size)
print(len(f3_tt) * batch_size)

check(batch_x, batch_y)
dis_gen(batch_x, batch_y)

In [None]:
from tensorflow.keras.layers import (
    Input,
    Conv2D,
    MaxPooling2D,
    AveragePooling2D,
    Concatenate,
    BatchNormalization,
    Activation,
    Flatten,
    Dense,
    Dropout,
    Add,
    Multiply,
)
from tensorflow.keras.regularizers import l2
from keras.models import Model
from tensorflow.keras.utils import get_custom_objects


# Mish activation function
class Mish(Activation):
    """
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
    """

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = "Mish"


def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))

# Register Mish activation function
get_custom_objects().update({"Mish": Mish(mish)})

# Model
def create_inception(input_shape=(256, 256, 3), num_classes=4, dr=0.0):
    input_tensor = Input(shape=input_shape)

    x = Conv2D(
        32,
        (3, 3),
        strides=(2, 2),
        padding="valid",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name="conv2d_1",
    )(input_tensor)
    x = BatchNormalization(name="batch_normalization_1")(x)
    x = Activation("Mish", name="activation_1")(x)
    x = Dropout(dr)(x)
    x = residual_block(x, 32, 0.00004, name="1")
    x = attention_block(x, 32, name="1")

    x = Conv2D(
        64,
        (3, 3),
        strides=(2, 2),
        padding="valid",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name="conv2d_1",
    )(input_tensor)
    x = BatchNormalization(name="batch_normalization_1")(x)
    x = Activation("Mish", name="activation_1")(x)
    x = Dropout(dr)(x)
    x = residual_block(x, 64, 0.00004, name="2")
    x = attention_block(x, 64, name="2")

    x = Conv2D(
        80,
        (3, 3),
        strides=(2, 2),
        padding="valid",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name="conv2d_1",
    )(input_tensor)
    x = BatchNormalization(name="batch_normalization_1")(x)
    x = Activation("Mish", name="activation_1")(x)
    x = Dropout(dr)(x)
    x = residual_block(x, 80, 0.00004, name="3")
    x = attention_block(x, 80, name="3")

    x = Conv2D(
        192,
        (3, 3),
        strides=(1, 1),
        padding="valid",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name="conv2d_5",
    )(x)
    x = BatchNormalization(name="batch_normalization_5")(x)
    x = Activation("Mish", name="activation_5")(x)
    x = Dropout(dr)(x)
    x = residual_block(x, 192, 0.00004, name="4")
    x = attention_block(x, 192, name="4")

    x = MaxPooling2D((3, 3), strides=(2, 2), padding="valid", name="max_pooling2d_2")(x)

    # Inception blocks
    x = inception_block(x, [64, 96, 128, 16, 32, 32], name="inception_3a")
    x = inception_block(x, [128, 128, 192, 32, 96, 64], name="inception_3b")
    x = inception_block(x, [192, 96, 208, 16, 48, 64], name="inception_4a")
    x = inception_block(x, [160, 112, 224, 24, 64, 64], name="inception_4b")
    x = inception_block(x, [128, 128, 256, 24, 64, 64], name="inception_4c")
    x = Dropout(0.2)(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding="valid", name="max_pooling2d_3")(x)
    x = inception_block(x, [256, 160, 320, 32, 128, 128], name="inception_5a")
    x = inception_block(x, [384, 192, 384, 48, 128, 128], name="inception_5b")
    x = inception_block(x, [576, 240, 576, 72, 192, 192], name="inception_5c")
    x = inception_block(x, [864, 360, 864, 108, 288, 288], name="inception_5d", dr_i=dr)
    x = Dropout(0.2)(x)

    # Classification block
    x = AveragePooling2D((7, 7), strides=(1, 1), name="avg_pooling2d_1")(x)
    x = Flatten(name="flatten_1")(x)
    x = Dense(
        num_classes,
        activation="softmax",
        kernel_regularizer=l2(0.00004),
        name="dense_1",
    )(x)

    # Create model
    model = Model(input_tensor, x, name="inception_v4")

    return model


def inception_block(x, filters, name, dr_i=0.00):
    # filters: [in_channels, x1_channels, x3_reduce_channels, x3_channels, x5_reduce_channels, x5_channels]
    (
        in_channels,
        x1_channels,
        x3_reduce_channels,
        x3_channels,
        x5_reduce_channels,
        x5_channels,
    ) = filters

    x1 = Conv2D(
        x1_channels,
        (1, 1),
        strides=(1, 1),
        padding="same",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name=name + "_x1",
    )(x)
    x1 = Dropout(dr_i)(x1)
    x1 = BatchNormalization(name=name + "_x1_bn")(x1)
    x1 = Activation("Mish", name=name + "_x1_act")(x1)
    x1 = Dropout(dr_i)(x1)

    x3_reduce = Conv2D(
        x3_reduce_channels,
        (1, 1),
        strides=(1, 1),
        padding="same",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name=name + "_x3_reduce",
    )(x)
    x3_reduce = BatchNormalization(name=name + "_x3_reduce_bn")(x3_reduce)
    x3_reduce = Activation("Mish", name=name + "_x3_reduce_act")(x3_reduce)
    x3_reduce = Dropout(dr_i)(x3_reduce)
    x3 = Conv2D(
        x3_channels,
        (3, 3),
        strides=(1, 1),
        padding="same",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name=name + "_x3",
    )(x3_reduce)
    x3 = BatchNormalization(name=name + "_x3_bn")(x3)
    x3 = Activation("Mish", name=name + "_x3_act")(x3)
    x3 = Dropout(dr_i)(x3)

    x5_reduce = Conv2D(
        x5_reduce_channels,
        (1, 1),
        strides=(1, 1),
        padding="same",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name=name + "_x5_reduce",
    )(x)
    x5_reduce = BatchNormalization(name=name + "_x5_reduce_bn")(x5_reduce)
    x5_reduce = Activation("Mish", name=name + "_x5_reduce_act")(x5_reduce)
    x5_reduce = Dropout(dr_i)(x5_reduce)
    x5 = Conv2D(
        x5_channels,
        (5, 5),
        strides=(1, 1),
        padding="same",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(0.00004),
        name=name + "_x5",
    )(x5_reduce)
    x5 = BatchNormalization(name=name + "_x5_bn")(x5)
    x5 = Activation("Mish", name=name + "_x5_act")(x5)
    x5 = Dropout(dr_i)(x5)

    x_out = Concatenate(name=name + "_concat")([x1, x3, x5])
    return x_out


def residual_block(input_tensor, filters, w_decay, name, dr_r=0.0):
    x = Conv2D(
        filters,
        (3, 3),
        padding="same",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(w_decay),
        name=f"res_conv{name}",
    )(input_tensor)
    x = BatchNormalization(name=f"batch_res{name}")(x)
    x = Activation("Mish", name=f"activation_res{name}")(x)
    x = Dropout(dr_r)(x)
    x = Conv2D(
        filters,
        (3, 3),
        padding="same",
        kernel_initializer="he_normal",
        kernel_regularizer=l2(w_decay),
        name=f"res_conv{name}_1",
    )(x)
    x = BatchNormalization(name=f"batch_res{name}_1")(x)
    x = Activation("Mish", name=f"activation_res{name}_1")(x)
    x = Dropout(dr_r)(x)
    x = Add()([x, input_tensor])
    return x


def attention_block(input_tensor, filters, name, dr=0.0):
    x = Conv2D(
        filters,
        (3, 3),
        strides=(1, 1),
        padding="same",
        kernel_initializer="he_normal",
        name=f"conv_att{name}_1",
    )(input_tensor)
    x = BatchNormalization(name=f"batch_att{name}_1")(x)
    x = Activation("Mish", name=f"ac_att{name}_1")(x)
    x = Conv2D(
        filters,
        (3, 3),
        strides=(1, 1),
        padding="same",
        kernel_initializer="he_normal",
        name=f"conv_att{name}_2",
    )(x)
    x = BatchNormalization(name=f"batch_att{name}_2")(x)
    x = Activation("sigmoid", name=f"ac_att{name}_2")(x)
    x = Dropout(dr)(x)
    x = Multiply()([input_tensor, x])
    return x

In [None]:
import tensorflow as tf
import random
import numpy as np
from keras.optimizers import optimizer
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers.experimental import AdamW
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img
# from tensorflow.keras.utils import array_to_image
import csv
from keras.callbacks import (
    ModelCheckpoint,
    EarlyStopping,
    TensorBoard,
    ReduceLROnPlateau,
)

# Define the callbacks
callbacks = [
    ModelCheckpoint("best_model.h5", save_best_only=True, monitor="accuracy"),
    EarlyStopping(monitor="loss", patience=6),
    TensorBoard(log_dir="logs"),
    ReduceLROnPlateau(monitor="loss", patience=3, factor=0.1),
]


def save_eval_results(eval_results, filename):
    # Open the file in write mode and write the evaluation results to it
    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(model.metrics_names)
        writer.writerow(eval_results)


def plot_history(history, call=None):
    fig, axs = plt.subplots(nrows=1, ncols=len(history.history), figsize=(20, 5))

    for i, metric in enumerate(history.history.keys()):
        axs[i].plot(history.history[metric])
        axs[i].set_title(metric)
        axs[i].set_xlabel("Epoch")
        axs[i].set_ylabel(metric)
    plt.savefig(f"{str(history)}_{call}.png")
    plt.show()


def quick_test(test_generator, model, img_size):
    # Get a batch of test data
    rand = np.random.randint(0, len(test_generator))
    batch_x, batch_y = test_generator.__getitem__(rand)

    # Generate predictions for the test data
    pred_y = model.predict(batch_x)

    # Create a pred folder if it doesn't exist
    if not os.path.exists("pred"):
        os.makedirs("pred")

    # Loop through the test data and save each image and its predicted label to the pred folder
    for i in range(batch_x.shape[0]):
        # Convert the image array to a PIL image
        img = array_to_img(batch_x[i])

        # Get the predicted label for the image
        pred_label = np.argmax(pred_y[i])

        # Save the image with its predicted label as the filename
        img.save(f"pred/{pred_label}_{i}.jpg")

def lerp(start, end, weight):
    return start + weight * (end - start)


def sparse_lerp(start, end, weight):
    # Mathematically equivalent, but you can't subtract a dense Tensor from sparse
    # IndexedSlices, so we have to flip it around.
    return start + weight * -(start - end)


# Lion optimizer
class Lion(optimizer.Optimizer):
    r"""Optimizer that implements the Lion algorithm.
    Lion was published in the paper "Symbolic Discovery of Optimization Algorithms"
    which is available at https://arxiv.org/abs/2302.06675
    Args:
      learning_rate: A `tf.Tensor`, floating point value, a schedule that is a
        `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable
        that takes no arguments and returns the actual value to use. The
        learning rate. Defaults to 1e-4.
      beta_1: A float value or a constant float tensor, or a callable
        that takes no arguments and returns the actual value to use. Factor
         used to interpolate the current gradient and the momentum. Defaults to 0.9.
      beta_2: A float value or a constant float tensor, or a callable
        that takes no arguments and returns the actual value to use. The
        exponential decay rate for the momentum. Defaults to 0.99.
    Notes:
    The sparse implementation of this algorithm (used when the gradient is an
    IndexedSlices object, typically because of `tf.gather` or an embedding
    lookup in the forward pass) does apply momentum to variable slices even if
    they were not used in the forward pass (meaning they have a gradient equal
    to zero). Momentum decay (beta2) is also applied to the entire momentum
    accumulator. This means that the sparse behavior is equivalent to the dense
    behavior (in contrast to some momentum implementations which ignore momentum
    unless a variable slice was actually used).
    """

    def __init__(
        self,
        learning_rate=1e-4,
        beta_1=0.9,
        beta_2=0.99,
        weight_decay=None,
        clipnorm=None,
        clipvalue=None,
        global_clipnorm=None,
        jit_compile=True,
        name="Lion",
        **kwargs,
    ):
        super().__init__(
            name=name,
            weight_decay=weight_decay,
            clipnorm=clipnorm,
            clipvalue=clipvalue,
            global_clipnorm=global_clipnorm,
            jit_compile=jit_compile,
            **kwargs,
        )
        self._learning_rate = self._build_learning_rate(learning_rate)
        self.beta_1 = beta_1
        self.beta_2 = beta_2

    def build(self, var_list):
        """Initialize optimizer variables.
        var_list: list of model variables to build Lion variables on.
        """
        super().build(var_list)
        if hasattr(self, "_built") and self._built:
            return
        self._built = True
        self._emas = []
        for var in var_list:
            self._emas.append(
                self.add_variable_from_reference(
                    model_variable=var, variable_name="ema"
                )
            )

    def update_step(self, gradient, variable):
        """Update step given gradient and the associated model variable."""
        lr = tf.cast(self.learning_rate, variable.dtype)
        beta_1 = tf.constant(self.beta_1, shape=(1,))
        beta_2 = tf.constant(self.beta_2, shape=(1,))

        var_key = self._var_key(variable)
        ema = self._emas[self._index_dict[var_key]]

        if isinstance(gradient, tf.IndexedSlices):
            # Sparse gradients.
            lerp_fn = sparse_lerp
        else:
            # Dense gradients.
            lerp_fn = lerp

        update = lerp_fn(ema, gradient, 1 - beta_1)
        update = tf.sign(update)
        variable.assign_sub(update * lr)

        ema.assign(lerp_fn(ema, gradient, 1 - beta_2))

    def get_config(self):
        config = super().get_config()

        config.update(
            {
                "learning_rate": self._serialize_hyperparameter(self._learning_rate),
                "beta_1": self.beta_1,
                "beta_2": self.beta_2,
            }
        )
        return config


def focal_loss(gamma=2.0, alpha=4.0):
    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002
        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]
        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})
        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.0e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.math.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1.0, model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)

    return focal_loss_fixed

# Lion optimizer
opt = Lion(5e-5, weight_decay=5e-6)

# training
if MODE == "FOLDS":
    # train on each fold
    for i in range(n_folds):
        print(f"At fold {i + 1}")
        model = create_inception(
            num_classes=8, input_shape=(image_size, image_size, 3)
        )
        model.compile(
            optimizer=Lion(1e-5),
            loss="categorical_crossentropy",
            metrics=[
                "accuracy",
                f1_m,
                recall_m,
                precision_m,
                tf.keras.metrics.AUC(),
                tf.keras.metrics.SpecificityAtSensitivity(0.5),
            ],
        )
        history = model.fit(train_gens[i], epochs=25, callbacks=callbacks)
        model.save(f"kather_fold_{str(i + 1)}.h5")
        eval = model.evaluate(test_gens[i])
        save_eval_results(eval, "Inc_Kather")

        # plotting the history
        plot_history(history)

        # testing
        quick_test(test_gens[i], model, (image_size, image_size))
else:
    model = create_inception(num_classes=8, input_shape=(image_size, image_size, 3))
    model.compile(
        optimizer=Lion(1e-5),
        loss=focal_loss(),
        metrics=[
            "accuracy",
            f1_m,
            recall_m,
            precision_m,
            tf.keras.metrics.AUC(),
            tf.keras.metrics.SpecificityAtSensitivity(0.5),
        ],
    )
    history = model.fit(train_gen, epochs=25, callbacks=callbacks)
    eval = model.evaluate(test_gen)
    save_eval_results(eval, "Inc_Kather")
    # plotting the history
    plot_history(history)

    # testing
    quick_test(test_gen, model, (image_size, image_size))

In [None]:
#import shutil

#shutil.move("kather_fold_1.h5", "/content/drive/MyDrive/")