# Packages

In [None]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization, Add, ReLU, GlobalAveragePooling2D
from tensorflow.keras.utils import plot_model
import os
import cv2
from google.colab.patches import cv2_imshow
from time import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras import layers, Model, Input
import math
from tensorflow.keras.losses import Huber
from tensorflow.keras.utils import plot_model
import tensorflow.keras.backend as K
from tensorflow.keras.metrics import Precision, Recall
from sklearn.utils.class_weight import compute_class_weight
import seaborn as sns
from sklearn.metrics import confusion_matrix
from torchvision import transforms
from PIL import Image
from skimage.measure import shannon_entropy
import albumentations as A
from albumentations.augmentations.dropout.coarse_dropout import CoarseDropout

# Functions

In [None]:
# Model Performance

def model_performance(df):
  print('--------Training-----------')
  print(f'Claasification Accuravy: {max(df["accuracy"])}')
  print(f'Claasification Loss: {min(df["loss"])}')
  print(f'Classfication Recall: {max(df["recall"])}')
  print(f'Classfication precision: {max(df["precision"])}')
  print(f'Classfication AUC: {max(df["auc"])}')
  print('--------Validation-----------')
  print(f'Val Claasification Accuravy: {max(df["val_accuracy"])}')
  print(f'Val Claasification Loss: {min(df["val_loss"])}')
  print(f'Val Classfication Recall: {max(df["val_recall"])}')
  print(f'Val Classfication precision: {max(df["val_precision"])}')
  print(f'Val Classfication AUC: {max(df["val_auc"])}')
  print('--------Learning Rate-----------')
  print(f'Min Learning Rate: {min(df["learning_rate"])}')
  print(f'Max Learning Rate: {max(df["learning_rate"])}')


  plt.figure(figsize=(8, 5))
  plt.plot(df["loss"], label="Train Loss")
  plt.plot(df["val_loss"], label="Validation Loss")
  plt.xlabel("Epochs")
  plt.ylabel("Loss")
  plt.title("Loss vs. Epochs")
  plt.legend()
  plt.grid(True)
  plt.show()

  # Plot Accuracy vs. Epochs
  plt.figure(figsize=(8, 5))
  plt.plot(df["accuracy"], label="Train Accuracy")
  plt.plot(df["val_accuracy"], label="Validation Accuracy")
  plt.xlabel("Epochs")
  plt.ylabel("Accuracy")
  plt.title("Accuracy vs. Epochs")
  plt.legend()

  # Plot Accuracy vs. Epochs
  plt.figure(figsize=(8, 5))
  plt.plot(df["recall"], label="Train recall")
  plt.plot(df["val_recall"], label="Validation recall")
  plt.xlabel("Epochs")
  plt.ylabel("recall")
  plt.title("recall vs. Epochs")
  plt.legend()

  # Plot Accuracy vs. Epochs
  plt.figure(figsize=(8, 5))
  plt.plot(df["precision"], label="Train precision")
  plt.plot(df["val_precision"], label="Validation precision")
  plt.xlabel("Epochs")
  plt.ylabel("precision")
  plt.title("precision vs. Epochs")
  plt.legend()

In [None]:
## One Image Test
def one_image_test(model, data_set, class_ids = ['H1', 'H2', 'H3', 'H5', 'H6']):
  i = np.random.randint(0, len(data_set))
  actual_class = data_set.iloc[i].class_id
  img_path = data_set.iloc[i].img_path
  img_name = data_set.iloc[i].img_name

  try:
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LANCZOS4)
    img = img.astype(np.float32) / 255.0
    img_input = np.expand_dims(img, axis=0)
    result = model.predict(img_input)
    predicted_class = class_ids[result.argmax()]
    confidence = result.max()
    print(f'Image Name: {img_name}, Actual Class: {actual_class}, Predicted Class: {predicted_class}, Confidence: {confidence}')

  except:
    print(data_set.iloc[i].img_path)

In [None]:
#GradCAM
def grad_cam(model, image, layer_name):
    # Ensure the image has the correct shape
    image = np.expand_dims(image, axis=0)  # Add batch dimension

    # Get the model's expected input structure
    if isinstance(model.input, list):
        inputs = [image]  # Wrap in a list if needed
    else:
        inputs = image  # Pass as a single tensor

    # Get the output of the last convolutional layer
    conv_layer = model.get_layer(layer_name)
    grad_model = Model(inputs=model.inputs, outputs=[conv_layer.output, model.output])

    # Compute gradients
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(inputs)
        loss = predictions[:, np.argmax(predictions[0])]  # Use the predicted class

    grads = tape.gradient(loss, conv_outputs)
    if grads is None:
        raise ValueError(f"Gradients are None for layer {layer_name}. Check if the layer is trainable.")

    grads = grads[0]
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # Compute the heatmap
    conv_outputs = conv_outputs[0]  # Remove batch dimension
    pooled_grads = pooled_grads[..., tf.newaxis]  # Add a new axis for matrix multiplication
    heatmap = tf.reduce_sum(conv_outputs * pooled_grads, axis=-1)
    heatmap = tf.maximum(heatmap, 0)  # ReLU

    # Normalize the heatmap
    heatmap_max = tf.reduce_max(heatmap)
    if heatmap_max == 0:  # Avoid division by zero
        print(f"Warning: Heatmap for layer {layer_name} is all zeros.")
        heatmap = tf.zeros_like(heatmap)  # Return a zero heatmap
    else:
        heatmap /= heatmap_max

    # Resize heatmap to match the input image size
    heatmap = cv2.resize(heatmap.numpy(), (image.shape[2], image.shape[1]))
    return heatmap

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✅ GPU successfully assigned!")
    except RuntimeError as e:
        print(e)

In [None]:
data_set = pd.read_csv('/content/drive/MyDrive/SDS-CP-31/data_set.csv', index_col=0)

# EDA

## Edge Density Analsys

In [None]:
data_sampled = data_set.groupby('class_id').sample(n=100, random_state=40)

In [None]:
edge_results = pd.DataFrame(columns=['class_id', 'mean_edge', 'edge_density', 'entropy'])

In [None]:
results = []
threshold = 30
for i in range (data_sampled.shape[0]):
  actual_class = data_sampled.iloc[i].class_id
  img_path = data_sampled.iloc[i].img_path
  try:
    img_gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
  except:
    continue

  sobel_x = cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3)
  sobel_y = cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=3)
  edge_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
  mean_edge = edge_magnitude.mean()
  density = np.sum(edge_magnitude > threshold) / edge_magnitude.size
  edge_density = np.sum(edge_magnitude > threshold) / edge_magnitude.size
  entropy = shannon_entropy(edge_magnitude)

  results.append([actual_class, mean_edge, edge_density, entropy])

edge_results = pd.DataFrame(results, columns=['class_id', 'mean_edge', 'edge_density', 'entropy'])

In [None]:
edge_results.groupby('class_id').mean()

# Model Building

In [None]:
inputs = Input(shape=(224, 224, 3))

# Initial Conv Layer
x = layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

# Conv Block 2
x = Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = BatchNormalization()(x)
x = ReLU()(x)

x = MaxPooling2D((3, 3))(x)
x = Dropout(0.1)(x)

# Residual Block 1
shortcut1 = x
x = layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.Add()([x, shortcut1])
x = layers.ReLU()(x)
x = layers.MaxPooling2D((2, 2))(x)

# Residual Block 2
shortcut2 = layers.Conv2D(128, (1, 1), padding='same')(x)
x = layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.Add()([x, shortcut2])
x = layers.ReLU()(x)
x = layers.MaxPooling2D((2, 2))(x)

# Residual Block 3
shortcut3 = layers.Conv2D(256, (1, 1), padding='same')(x)
x = layers.Conv2D(256, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(256, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.Add()([x, shortcut3])
x = layers.ReLU()(x)
x = layers.MaxPooling2D((2, 2))(x)

# Classification Head
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-3))(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(5, activation='softmax')(x)

model4 = Model(inputs, outputs)

In [None]:
model4.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss='categorical_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.AUC(name='auc')
    ]
)

In [None]:
df_train, df_val = train_test_split(
    data_set,
    test_size=0.2,
    stratify=data_set['class_id'],
    random_state=42
)

In [None]:
def balanced_batch_generator(df, batch_size=20):
    class_ids = ['H1', 'H2', 'H3', 'H5', 'H6']
    class_to_idx = {cls: i for i, cls in enumerate(class_ids)}
    samples_per_class = batch_size // len(class_ids)

    while True:
        batch_df = pd.concat([
            df[df['class_id'] == cls].sample(samples_per_class, replace=True)
            for cls in class_ids
        ]).sample(frac=1).reset_index(drop=True)

        images, labels = [], []

        for _, row in batch_df.iterrows():
            img_path = row['img_path']
            label_str = row['class_id']
            label_idx = class_to_idx[label_str]

            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            if img is None:
                continue

            img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LANCZOS4)
            img = img.astype(np.float32) / 255.0

            if img.ndim == 2:
                img = np.stack([img]*3, axis=-1)
            elif img.shape[2] == 1:
                img = np.concatenate([img]*3, axis=-1)

            images.append(img)
            labels.append(tf.keras.utils.to_categorical(label_idx, num_classes=5))

            if len(images) >= batch_size:
                break

        yield np.array(images), np.array(labels)

In [None]:
train_gen = balanced_batch_generator(df_train, apply_aug=False, batch_size=20)
val_gen = balanced_batch_generator(df_val, apply_aug=False, batch_size=20)

In [None]:
batchsize = 20
steps_per_epoch = max(1, len(df_train) // batchsize)
validation_steps = max(1, len(df_val) // batchsize)
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.2, patience=4, min_lr=1e-6)

In [None]:
start = time()
history = model4.fit(
        train_gen,
        steps_per_epoch=steps_per_epoch,
        epochs=30,
        validation_data=val_gen,
        validation_steps=validation_steps,
        batch_size=batchsize,
        callbacks=lr_scheduler,
        verbose=1
    )
end = time()
print(f'Time Taken: {(end-start)/60} min')

In [None]:
history41_df = pd.DataFrame(history.history)

## Model Testing

In [None]:
model_performance(history41_df)

In [None]:
data_sampled = data_set.groupby('class_id').sample(n=100, random_state=40)

In [None]:
test_res_500 = pd.DataFrame(columns=['actual_claass', 'predicted_class', 'confidence'])

In [None]:
class_ids = ['H1', 'H2', 'H3', 'H5', 'H6']
for i in range (data_sampled.shape[0]):
  actual_class = data_sampled.iloc[i].class_id
  img_path = data_sampled.iloc[i].img_path

  try:
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LANCZOS4)
    img = img.astype(np.float32) / 255.0
    img_input = np.expand_dims(img, axis=0)
    result = model4.predict(img_input)
    predicted_class = class_ids[result.argmax()]
    confidence = result.max()
    test_res_500.loc[i] = [actual_class, predicted_class, confidence]
  except:
    print(data_sampled.iloc[i].img_path)

In [None]:
cm = confusion_matrix(test_res_500['actual_claass'], test_res_500['predicted_class'], labels=sorted(test_res_500['actual_claass'].unique()))

# Plot
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=sorted(test_res_500['actual_claass'].unique()),
            yticklabels=sorted(test_res_500['actual_claass'].unique()))
plt.xlabel('Predicted Class')
plt.ylabel('Actual Class')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

## GradCAM

In [None]:
model4_layer_names = ['conv2d', 'conv2d_1', 'conv2d_2', 'conv2d_3', 'conv2d_4', 'conv2d_5', 'conv2d_8', 'conv2d_9', 'conv2d_7']

In [None]:
i = np.random.randint(0, len(df_val))
test_img_path = df_val.iloc[i]['img_path']
img = cv2.imread(test_img_path)
img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LANCZOS4)
cv2_imshow(img)

img = img / 255.0  # Normalize
for j in model4_layer_names:
    try:
        heatmap = grad_cam(model4, img, j)  # Tumor class
        if np.max(heatmap) > 0:  # Only plot if heatmap is not blank
            plt.imshow(heatmap, cmap="jet")
            plt.title(f"Grad-CAM for {j}")
            plt.axis("off")
            plt.show()
        else:
            print(f"Skipping layer {j} because the heatmap is blank.")
    except ValueError as e:
        print(f"Error for layer {j}: {e}")