### Connect to Drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd ../gdrive/MyDrive


### Import libraries

In [None]:
import os
import warnings
import numpy as np
from tensorflow.keras import backend as K
import logging
import random
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.applications.convnext import preprocess_input
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import Callback
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
gpus = tf.config.experimental.list_physical_devices('GPU')

### Importing Cyclical Learning Rate

In [None]:
class CyclicLR(Callback):
    """
    code taken from https://github.com/bckenstler/CLR/blob/master/clr_callback.py

    This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with
    some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
    The amplitude of the cycle can be scaled on a per-iteration or
    per-cycle basis.
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "triangular2":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exp_range":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each
        cycle iteration.
    For more detail, please see paper.

    # Example
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```

    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {triangular, triangular2, exp_range}.
            Default 'triangular'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
    """

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1 / (2. ** (x - 1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma ** (x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}
        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(
                self.clr_iterations)

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())

    def on_batch_end(self, epoch, logs=None):

        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

        K.set_value(self.model.optimizer.lr, self.clr())


### Load and process the dataset removing outliers

In [None]:
seed = 42
plt.ion()
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
np.random.seed(seed)
random.seed(seed)
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)
npz_file_path = 'public_data.npz'  

# Load data from the NPZ file
data = np.load(npz_file_path, allow_pickle=True)


image_data = data['data']
labels_data = data['labels']
images = []
labels = []

for lbl in labels_data:
    if lbl == 'healthy':
        labels.append(0)
    else:
        labels.append(1)

for img in image_data:
    # Normalizing images
    dim = min(img.shape[:-1])
    img = img[(img.shape[0]-dim)//2:(img.shape[0]+dim)//2, (img.shape[1]-dim)//2:(img.shape[1]+dim)//2, :]
    img = tfkl.Resizing(96, 96)(img)
    images.append(img)
images = np.array(images)


##SHREK REMOVAL
first_image = images[58]
mse_distances = [np.mean((first_image - image) ** 2) for image in images]

# Combine the images and their MSE distances
image_data = list(zip(images, mse_distances))

# Sort the images based on MSE distances (in ascending order)
image_data.sort(key=lambda x: x[1])

# Select the top 100 images with the lowest MSE distances
top_100_images = [image for image, _ in image_data[:98]]

new_images = []
new_y = []

for image, label in zip(images, labels):
    if not any(np.array_equal(image, top_image) for top_image in top_100_images):
        new_images.append(image)
        new_y.append(label)

# Update images and labels
images = new_images
labels = new_y
images = np.array(new_images)

### TROLOLO REMOVAL
first_image = images[332]

mse_distances = [np.mean((first_image - image) ** 2) for image in images]

# Combine the images and their MSE distances
image_data = list(zip(images, mse_distances))

# Sort the images based on MSE distances (in ascending order)
image_data.sort(key=lambda x: x[1])

# Select the top 100 images with the lowest MSE distances
top_100_images = [image for image, _ in image_data[:98]]
new_images = []
new_y = []

for image, label in zip(images, labels):
    if not any(np.array_equal(image, top_image) for top_image in top_100_images):
        new_images.append(image)
        new_y.append(label)

# Update images and labels
images = new_images
labels = new_y
images = np.array(new_images)



X = images
y = labels
y = tfk.utils.to_categorical(labels,2)

### Splitting sets and balancing classes

In [None]:
# Split train_val into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=seed, test_size=500, stratify=np.argmax(y_train_val,axis=1))

# Apply SMOTE to oversample the minority class
smote = SMOTE(sampling_strategy='minority', random_state=seed)
X_resampled, y_resampled = smote.fit_resample(X_train.reshape(-1, 96*96*3), y_train)
y_resampled = tfk.utils.to_categorical(y_resampled,len(np.unique(y_resampled)))

# Reshape the resampled data back to the original shape
X_resampled = X_resampled.reshape((-1, 96, 96, 3))
X_train = X_resampled
y_train = y_resampled
print(f"X_train shape: {X_train.shape}, Y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, Y_val shape: {y_val.shape}")

### Transfer learning import

In [None]:
transfer = tfk.applications.ConvNeXtLarge(
    input_shape=(96, 96, 3),
    include_top=False,
    weights="imagenet",
    pooling='avg',
)
transfer.trainable = True
for i in range(90):
  transfer.layers[i].trainable=False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/convnext/convnext_large_notop.h5


### Model build

In [None]:
tl_model = tf.keras.Sequential([
    tfk.Input(shape=(96, 96, 3)),
    Dropout(0.1),
    transfer,
    BatchNormalization(),
    tfkl.Dense(2, activation='sigmoid')  # Adjust num_classes accordingly
])

### Model compile and learning rate implementation

In [None]:
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=['accuracy'])
tl_model.summary()
batch_size=512
training_samples = int(len(X)*batch_size)
step_size = 4*training_samples // batch_size
lr_schedule = CyclicLR(
    mode='triangular',
    base_lr=1e-5,
    max_lr=1e-4,
    step_size= step_size)


### Training and saving the model

In [None]:
# Train the model
tl_history = tl_model.fit(
    x = preprocess_input(X_train), # We need to apply the preprocessing thought for the transferred network
    y = y_train,
    batch_size = batch_size,
    epochs = 50,
    validation_data = (preprocess_input(X_val), y_val), # We need to apply the preprocessing thought for the transferred network
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=5, restore_best_weights=True), lr_schedule]
).history
save_model_dir = 'savemymodel'
os.makedirs(save_model_dir, exist_ok=True)
tf.keras.models.save_model(tl_model, os.path.join(save_model_dir, 'ColabSubmissionModel'))
print("Model saved to:", save_model_dir)

### Plotting

In [None]:
plt.figure(figsize=(15,5))

plt.plot(tl_history['accuracy'], alpha=.3, color='#4D61E2', linestyle='--')
plt.plot(tl_history['val_accuracy'], label='Transfer Learning', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()


### The model.py

In [None]:
import os
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers as tfkl
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input
class model:
    def __init__(self, path):
        self.model = tf.keras.models.load_model(os.path.join(path, 'ColabSubmissionModel'))

    def preprocess_image(self, img):
        dim = min(img.shape[:-1])
        img = img[(img.shape[0] - dim) // 2:(img.shape[0] + dim) // 2,
              (img.shape[1] - dim) // 2:(img.shape[1] + dim) // 2, :]
        img = tfkl.Resizing(96, 96)(img)
        img = preprocess_input(img)
        return img
    def predict(self, X):
        # Preprocess the images before making predictions
        preprocessed_images = [self.preprocess_image(img) for img in X]

        # Convert the list of images to a NumPy array
        preprocessed_images = np.array(preprocessed_images)

        # Make predictions using the model
        predictions = self.model.predict(preprocessed_images)

        # Convert predictions to class labels
        predicted_classes = np.argmax(predictions, axis=-1)
        out = tf.convert_to_tensor(predicted_classes)
        return out
