In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount("/content/drive")

In [None]:
#!wget "https://zenodo.org/record/3632035/files/ICIAR2018_BACH_Challenge.zip?download=1"
#!wget "https://zenodo.org/record/3632035/files/ICIAR2018_BACH_Challenge_TestDataset.zip?download=1"
!unzip "/content/drive/MyDrive/ICIAR2018_BACH.zip"

In [None]:
import keras.backend as K
import tensorflow as tf
import numpy as np
from sklearn.metrics import roc_auc_score

# metrics
def auc(y_true, y_pred):
    # First, we need to convert the one-hot encoded labels and predicted probabilities
    # into a single label and probability for each sample.
    y_true = np.argmax(y_true, axis=1)
    y_pred = y_pred[:, 1:]

    # Then we can use the roc_auc_score function to compute the AUC for each class
    auc_scores = []
    for i in range(y_pred.shape[1]):
        auc_scores.append(roc_auc_score(y_true, y_pred[:, i]))

    # Return the average AUC across all classes
    return np.mean(auc_scores)


def dice_score(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2.0 * intersection + 1) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1)


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))


def jaccard_distance(y_true, y_pred, smooth=100):
    intersection = tf.reduce_sum(y_true * y_pred, axis=(1, 2))
    sum_ = tf.reduce_sum(y_true + y_pred, axis=(1, 2))
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    jd = (1 - jac) * smooth
    return tf.reduce_mean(jd)


def tf_mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.5):
        y_pred_ = tf.cast(y_pred > t, tf.int32)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        prec.append(score)
    val = K.mean(K.stack(prec), axis=0)
    return [val, up_opt]


def cross_entropy_balanced(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)

    count_neg = tf.reduce_sum(1.0 - y_true)
    count_pos = tf.reduce_sum(y_true)

    beta = count_neg / (count_pos + count_neg)

    pos_weight = beta / (1 - beta)

    cost = tf.nn.weighted_cross_entropy_with_logits(
        logits=y_pred, labels=y_true, pos_weight=pos_weight
    )

    cost = tf.reduce_mean(cost * (1 - beta))

    return tf.where(tf.equal(count_pos, 0.0), 0.0, cost)


def pixel_error(y_true, y_pred):
    pred = tf.cast(tf.greater(y_pred, 0.5), tf.int32)
    error = tf.cast(tf.not_equal(pred, tf.cast(y_true, tf.int32)), tf.float32)

    return tf.reduce_mean(error)

In [None]:
from tensorflow.keras.preprocessing.image import (
    ImageDataGenerator,
    load_img,
    img_to_array,
)
from PIL import Image
import os
import numpy
import shutil
from tqdm import tqdm
import random
import albumentations as A
import cv2

# Define parameters for image resizing and augmentation

parent_folder_path = "/content/ICIAR2018_BACH_Challenge_JPG/"

num_augmented_images = 6 # number of augmented images per original image
target_size = (256, 256)
test_ratio = 0.2  # ratio compared to the original dataset size

for class_name in os.listdir(parent_folder_path):
    class_folder_path = os.path.join(parent_folder_path, class_name)
    if not os.path.isdir(class_folder_path):
        continue

    test_class_folder_path = os.path.join(
        parent_folder_path, "..", "ICIAR2018_BACH_Challenge_JPG_test", class_name
    )
    os.makedirs(test_class_folder_path, exist_ok=True)

    img_names = [
        img_name
        for img_name in os.listdir(class_folder_path)
        if img_name.lower().endswith((".tif", ".png", ".jpg", ".jpeg"))
    ]
    random.shuffle(img_names)
    num_test_images = int(len(img_names) * test_ratio)

    for img_name in img_names[:num_test_images]:
        img = Image.open(os.path.join(class_folder_path, img_name))
        img = img.resize(target_size)
        img.save(os.path.join(test_class_folder_path, img_name))
        os.remove(os.path.join(class_folder_path, img_name))

    for img_name in img_names[num_test_images:]:
        img = Image.open(os.path.join(class_folder_path, img_name))
        img = img.resize(target_size)

        transform = A.Compose(
            [
                A.Rotate(limit=20),
                A.HorizontalFlip(),
                A.VerticalFlip(),
                A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2),
                A.OneOf(
                    [
                        A.ShiftScaleRotate(
                            shift_limit=0.18, scale_limit=0.18, rotate_limit=18, p=0.25
                        ),
                        A.ShiftScaleRotate(
                            shift_limit=0.18,
                            scale_limit=0.18,
                            rotate_limit=18,
                            border_mode=cv2.BORDER_CONSTANT,
                            value=0,
                            interpolation=cv2.INTER_NEAREST,
                            p=0.25,
                        ),
                    ],
                    p=0.4,
                ),
                A.RGBShift(p=0.2),
            ]
        )

        for i in range(num_augmented_images):
            transformed = transform(image=np.array(img))
            transformed_image = Image.fromarray(transformed["image"])
            transformed_image.save(
                os.path.join(class_folder_path, f"aug_{i}_{img_name}")
            )

In [None]:
import os

train_path = "/content/ICIAR2018_BACH_Challenge_JPG"
test_path = "/content/ICIAR2018_BACH_Challenge_JPG_test"

# Check the number of images in each class
def check_nums(dataset_path):
    class_folders = os.listdir(dataset_path)
    for class_folder in class_folders:
        class_folder_path = os.path.join(dataset_path, class_folder)
        if os.path.isdir(class_folder_path):
            num_images = len(os.listdir(class_folder_path))
            print(f"Class {class_folder} has {num_images} images.")


check_nums(train_path)
check_nums(test_path)

In [None]:
import numpy as np
import cv2
import albumentations as A
import os
import tensorflow as tf

# data generator following the Keras Sequence structure
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(
        self,
        directory,
        batch_size,
        target_size=(150, 150),
        shuffle=True,
        augmentations=None,
    ):
        self.directory = directory
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.augmentations = augmentations
        self.class_names = sorted(os.listdir(directory))
        self.num_classes = len(self.class_names)
        self.samples = []
        for i, class_name in enumerate(self.class_names):
            class_dir = os.path.join(self.directory, class_name)
            for filename in os.listdir(class_dir):
                self.samples.append((os.path.join(class_dir, filename), i))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.samples) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_samples = self.samples[
            idx * self.batch_size : (idx + 1) * self.batch_size
        ]
        batch_images = []
        batch_labels = []
        for sample in batch_samples:
            image = cv2.imread(sample[0])
            # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, self.target_size)
            label = sample[1]
            if self.augmentations is not None:
                augmented = self.augmentations(image=image)
                image = augmented["image"]
            batch_images.append(image)
            batch_labels.append(label)
        return np.array(batch_images) / 255.0, tf.keras.utils.to_categorical(
            batch_labels, num_classes=self.num_classes
        )

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.samples)

# further augmentations if needed
train_augmentations = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.3, brightness_limit=0.2, contrast_limit=0.2),
        A.ColorJitter(p=0.2),
        A.GaussianBlur(p=0.25, blur_limit=(3, 7)),
        A.RandomRotate90(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=(-20, 20), p=0.5, interpolation=cv2.INTER_NEAREST),
        A.OneOf(
            [
                # different border modes for more variety
                A.ShiftScaleRotate(
                    shift_limit=0.18,
                    scale_limit=0.18,
                    rotate_limit=18,
                    border_mode=cv2.BORDER_CONSTANT,
                    value=0,
                    interpolation=cv2.INTER_NEAREST,
                ),
                A.ShiftScaleRotate(
                    shift_limit=0.25,
                    scale_limit=0.25,
                    rotate_limit=25,
                    interpolation=cv2.INTER_NEAREST,
                ),
            ],
            p=0.4,
        ),
    ]
)

batch_size = 64
image_size = 256

# get the data generators
def get_fold_generator(
    fold_path,
    batch_size=batch_size,
    target_size=(image_size, image_size),
    shuffle=True,
    augmentations=train_augmentations,
    mode="TRAIN",
):
    if mode == "TRAIN":
        generator = CustomDataGenerator(
            directory=fold_path,
            batch_size=batch_size,
            target_size=target_size,
            shuffle=shuffle,
            augmentations=augmentations,
        )
    elif mode == "VAL":
        generator = CustomDataGenerator(
            directory=fold_path,
            batch_size=batch_size,
            target_size=target_size,
            shuffle=shuffle,
            augmentations=augmentations,
        )
    elif mode == "TEST":
        generator = CustomDataGenerator(
            directory=fold_path,
            batch_size=batch_size,
            target_size=target_size,
            shuffle=False,
            augmentations=None,
        )
    return generator


train_generator = get_fold_generator(
    "/content/ICIAR2018_BACH_Challenge_JPG/", mode="TRAIN"
)
test_generator = get_fold_generator(
    "/content/ICIAR2018_BACH_Challenge_JPG_test/", mode="TEST"
)

In [None]:
import matplotlib.pyplot as plt

# get sample batch
batch_x, batch_y = train_generator.__getitem__(0)
batch_x1, batch_y1 = test_generator.__getitem__(0)

# check data integrity
def check(x, y):
    print(f"shapes: x: {x.shape}, y: {y.shape}")
    print(f"norms: x: {np.min(x), np.max(x)}, y: {np.min(y), np.max(y)}")
    print(f"types: x: {type(x)}, y; {type(y)}")


check(batch_x, batch_y)
check(batch_x1, batch_y1)


# display images
def dis_gen(x, y):
    n = len(x)
    fig, axs = plt.subplots(nrows=1, ncols=n, figsize=(30, 30))
    for i in range(n):
        axs[i].imshow(x[i])
        axs[i].set_title(f"Label: {y[i]}")
        axs[i].axis(False)
    plt.show()


dis_gen(batch_x, batch_y)
dis_gen(batch_x1, batch_y1)

In [None]:
# get the model
# @leondgarse https://github.com/leondgarse/keras_cv_attention_model

!pip install -U keras-cv-attention-models
import keras_cv_attention_models
import tensorflow as tf

model = keras_cv_attention_models.tinyvit.TinyViT_5M(
    input_shape=(256, 256, 3), num_classes=4
)

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img

# from tensorflow.keras.utils import array_to_image
import csv
from keras.callbacks import (
    ModelCheckpoint,
    EarlyStopping,
    TensorBoard,
    ReduceLROnPlateau,
)
from keras.callbacks import LearningRateScheduler
import matplotlib.pyplot as plt
from typing import Tuple, Optional, Callable
import tensorflow as tf
from keras.optimizers import optimizer

!pip install git+https://github.com/artemmavrin/focal-loss.git
from focal_loss import SparseCategoricalFocalLoss
from keras.models import load_model

# Define the callbacks
callbacks = [
    ModelCheckpoint("best_model.h5", save_best_only=True, monitor="accuracy"),
    EarlyStopping(monitor="loss", patience=2),
    TensorBoard(log_dir="logs"),
    ReduceLROnPlateau(monitor="loss", patience=1, factor=0.1),
]

# functions


def focal_loss(gamma=2.0, alpha=4.0):
    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002
        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]
        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})
        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.0e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.math.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1.0, model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)

    return focal_loss_fixed


def exists(val):
    return val is not None


# update functions


@tf.function
def update_fn(p, grad, exp_avg, lr, wd, beta1, beta2):
    # stepweight decay

    p.assign(p * (1 - lr * wd))

    # weight update

    update = (
        tf.raw_ops.LinSpace(start=1.0, stop=0.0, num=1, name=None)[0] * exp_avg
        + (1 - tf.raw_ops.LinSpace(start=1.0, stop=0.0, num=1, name=None)[0]) * grad
    )
    p.assign_add(tf.sign(update) * -lr)

    # decay the momentum running average coefficient

    exp_avg.assign(exp_avg * beta2 + grad * (1 - beta2))


# class
def lerp(start, end, weight):
    return start + weight * (end - start)


def sparse_lerp(start, end, weight):
    # Mathematically equivalent, but you can't subtract a dense Tensor from sparse
    # IndexedSlices, so we have to flip it around.
    return start + weight * -(start - end)


class Lion(optimizer.Optimizer):
    """Optimizer that implements the Lion algorithm.
    Lion was published in the paper "Symbolic Discovery of Optimization Algorithms"
    which is available at https://arxiv.org/abs/2302.06675
    Args:
      learning_rate: A `tf.Tensor`, floating point value, a schedule that is a
        `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable
        that takes no arguments and returns the actual value to use. The
        learning rate. Defaults to 1e-4.
      beta_1: A float value or a constant float tensor, or a callable
        that takes no arguments and returns the actual value to use. Factor
         used to interpolate the current gradient and the momentum. Defaults to 0.9.
      beta_2: A float value or a constant float tensor, or a callable
        that takes no arguments and returns the actual value to use. The
        exponential decay rate for the momentum. Defaults to 0.99.
    Notes:
    The sparse implementation of this algorithm (used when the gradient is an
    IndexedSlices object, typically because of `tf.gather` or an embedding
    lookup in the forward pass) does apply momentum to variable slices even if
    they were not used in the forward pass (meaning they have a gradient equal
    to zero). Momentum decay (beta2) is also applied to the entire momentum
    accumulator. This means that the sparse behavior is equivalent to the dense
    behavior (in contrast to some momentum implementations which ignore momentum
    unless a variable slice was actually used).
    """

    def __init__(
        self,
        learning_rate=1e-4,
        beta_1=0.9,
        beta_2=0.99,
        weight_decay=None,
        clipnorm=None,
        clipvalue=None,
        global_clipnorm=None,
        jit_compile=True,
        name="Lion",
        **kwargs,
    ):
        super().__init__(
            name=name,
            weight_decay=weight_decay,
            clipnorm=clipnorm,
            clipvalue=clipvalue,
            global_clipnorm=global_clipnorm,
            jit_compile=jit_compile,
            **kwargs,
        )
        self._learning_rate = self._build_learning_rate(learning_rate)
        self.beta_1 = beta_1
        self.beta_2 = beta_2

    def build(self, var_list):
        """Initialize optimizer variables.
        var_list: list of model variables to build Lion variables on.
        """
        super().build(var_list)
        if hasattr(self, "_built") and self._built:
            return
        self._built = True
        self._emas = []
        for var in var_list:
            self._emas.append(
                self.add_variable_from_reference(
                    model_variable=var, variable_name="ema"
                )
            )

    def update_step(self, gradient, variable):
        """Update step given gradient and the associated model variable."""
        lr = tf.cast(self.learning_rate, variable.dtype)
        beta_1 = tf.constant(self.beta_1, shape=(1,))
        beta_2 = tf.constant(self.beta_2, shape=(1,))

        var_key = self._var_key(variable)
        ema = self._emas[self._index_dict[var_key]]

        if isinstance(gradient, tf.IndexedSlices):
            # Sparse gradients.
            lerp_fn = sparse_lerp
        else:
            # Dense gradients.
            lerp_fn = lerp

        update = lerp_fn(ema, gradient, 1 - beta_1)
        update = tf.sign(update)
        variable.assign_sub(update * lr)

        ema.assign(lerp_fn(ema, gradient, 1 - beta_2))

    def get_config(self):
        config = super().get_config()

        config.update(
            {
                "learning_rate": self._serialize_hyperparameter(self._learning_rate),
                "beta_1": self.beta_1,
                "beta_2": self.beta_2,
            }
        )
        return config

# train the model
model.compile(
    optimizer=Lion(learning_rate=6e-6),
    loss=focal_loss(),
    metrics=[
        "accuracy",
        f1_m,
        precision_m,
        recall_m,
        tf.keras.metrics.SpecificityAtSensitivity(0.5),
    ],
)
history = model.fit(train_generator, epochs=70, callbacks=callbacks)
model.save("tumor_model.h5")
evals = model.evaluate(test_generator)


def save_eval_results(eval_results, filename):
    # Open the file in write mode and write the evaluation results to it
    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(model.metrics_names)
        writer.writerow(eval_results)


# plot the training history as a graph
def plot_history(history, call=None):
    fig, axs = plt.subplots(nrows=1, ncols=len(history.history), figsize=(20, 5))

    for i, metric in enumerate(history.history.keys()):
        axs[i].plot(history.history[metric])
        axs[i].set_title(metric)
        axs[i].set_xlabel("Epoch")
        axs[i].set_ylabel(metric)
    plt.savefig(f"{str(history)}_{call}.png")
    plt.show()


def load_m(model_name):
    model = load_model(
        model_name,
        custom_objects={
            "focal_loss_fixed": focal_loss,
            "Lion": Lion,
            "f1_m": f1_m,
            "precision_m": precision_m,
            "recall_m": recall_m,
        },
    )
    return model


def quick_test(test_generator, model, img_size):
    # Get a batch of test data
    batch_x, batch_y = test_generator.__getitem__(0)

    # Generate predictions for the test data
    pred_y = model.predict(batch_x)

    # Create a pred folder if it doesn't exist
    if not os.path.exists("pred"):
        os.makedirs("pred")

    # Loop through the test data and save each image and its predicted label to the pred folder
    for i in range(batch_x.shape[0]):
        # Convert the image array to a PIL image
        img = array_to_img(batch_x[i])

        # Get the predicted label for the image
        pred_label = np.argmax(pred_y[i])

        # Save the image with its predicted label as the filename
        img.save(f"pred/{pred_label}_{i}.jpg")

# load already trained model
best_model = load_m("/content/best_model.h5")
best_model.compile(
    optimizer=Lion(learning_rate=1e-4, weight_decay=1e-5),
    loss=focal_loss(),
    metrics=[
        "accuracy",
        f1_m,
        precision_m,
        recall_m,
        tf.keras.metrics.SpecificityAtSensitivity(0.5),
    ],
)
save_eval_results(evals, "pannuke_results")
plot_history(history)
quick_test(test_generator, model, (256, 256))
quick_test(test_generator, best_model, (256, 256))

In [None]:
import shutil

# move the models to Google Drive
shutil.move("/content/best_model.h5", "/content/drive/MyDrive/tumor_transformer_b.h5")
shutil.move("/content/tumor_model.h5", "/content/drive/MyDrive/tumor_transformer.h5")