In [None]:
import os
import tensorflow as tf
import numpy as np
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dropout, Dense, BatchNormalization, Conv2D, DepthwiseConv2D, Add, Activation, BatchNormalization, ReLU

# Allow memory growth
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print("Could not set memory growth:", e)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

class GAM(tf.keras.layers.Layer):
    def __init__(self, reduction_ratio=16):
        super(GAM, self).__init__()
        self.reduction_ratio = reduction_ratio

    def build(self, input_shape):
        channels = input_shape[-1]
        self.channel_mlp = tf.keras.Sequential([
            layers.Dense(channels // self.reduction_ratio, activation='relu'),
            layers.Dense(channels)
        ])
        self.spatial_conv = tf.keras.Sequential([
            layers.Conv2D(channels // self.reduction_ratio, 7, padding='same', activation='relu'),
            layers.Conv2D(1, 7, padding='same', activation='sigmoid')
        ])

    def call(self, inputs):
        ch_att = tf.reduce_mean(inputs, axis=[1, 2], keepdims=True)
        ch_att = self.channel_mlp(ch_att)
        sp_att = self.spatial_conv(inputs)
        return inputs * ch_att * sp_att

class ECA(tf.keras.layers.Layer):
    def __init__(self, gamma=2, b=1):
        super(ECA, self).__init__()
        self.gamma = gamma
        self.b = b

    def build(self, input_shape):
        channels = input_shape[-1]
        k = int(abs((tf.math.log(tf.cast(channels, tf.float32)) / tf.math.log(2.0) + self.b) / self.gamma))
        self.kernel_size = k if k % 2 else k + 1
        self.conv = layers.Conv1D(1, kernel_size=self.kernel_size, padding='same', use_bias=False)

    def call(self, inputs):
        x = tf.reduce_mean(inputs, axis=[1, 2], keepdims=True)
        x = tf.squeeze(x, axis=[1, 2])
        x = tf.expand_dims(x, axis=-1)
        x = self.conv(x)
        x = tf.sigmoid(x)
        x = tf.reshape(x, [-1, 1, 1, inputs.shape[-1]])
        return inputs * x


In [None]:
from tensorflow.keras.layers import (Conv2D, DepthwiseConv2D, BatchNormalization, ReLU,
                                     Add, Input, GlobalAveragePooling2D, Dropout, Dense)

def fused_mbconv(x, out_filters, expansion, stride):
    in_filters = x.shape[-1]
    hidden_dim = in_filters * expansion

    x_input = x
    x = Conv2D(hidden_dim, 3, strides=stride, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(out_filters, 1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    x = GAM()(x)

    if stride == 1 and in_filters == out_filters:
        x = Add()([x_input, x])
    return x

def mbconv(x, out_filters, expansion, stride):
    in_filters = x.shape[-1]
    hidden_dim = in_filters * expansion

    x_input = x
    x = Conv2D(hidden_dim, 1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = DepthwiseConv2D(3, strides=stride, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(out_filters, 1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    x = ECA()(x)

    if stride == 1 and in_filters == out_filters:
        x = Add()([x_input, x])
    return x


In [None]:
def build_paper_lowflops_model(input_shape=(224, 224, 3), num_classes=4):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, 3, strides=2, padding='same')(inputs)  # Smaller stem
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # Updated reduced-width blocks
    blocks = [
        (1, 16, 2, 1, 'fused'),    # lower out_channels
        (2, 24, 3, 2, 'fused'),
        (2, 32, 3, 2, 'fused'),
        (3, 64, 4, 2, 'mbconv'),
        (3, 96, 6, 1, 'mbconv'),
        (4, 160, 12, 2, 'mbconv'),
        (4, 128, 2, 1, 'mbconv')
    ]

    for expansion, out_filters, repeats, stride, block_type in blocks:
        for i in range(repeats):
            s = stride if i == 0 else 1
            if block_type == 'fused':
                x = fused_mbconv(x, out_filters, expansion, s)
            else:
                x = mbconv(x, out_filters, expansion, s)

    x = Conv2D(128, 1, padding='same')(x)  # Final bottleneck layer much smaller
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.2)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    return tf.keras.Model(inputs, outputs)


In [None]:
import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2

def get_flops(model, batch_size=1):
    # Build the model if not already built
    if not model.built:
        model.build(input_shape=(batch_size, *model.input_shape[1:]))

    # Get concrete function
    concrete_func = tf.function(model).get_concrete_function(
        tf.TensorSpec([batch_size, *model.input_shape[1:]], model.inputs[0].dtype)
    )

    # Convert to frozen concrete function
    frozen_func = convert_variables_to_constants_v2(concrete_func)

    # Use TF1 profiler API to calculate FLOPs
    run_meta = tf.compat.v1.RunMetadata()
    opts = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()

    flops = tf.compat.v1.profiler.profile(
        graph=frozen_func.graph,
        run_meta=run_meta,
        cmd='op',
        options=opts
    )

    return flops.total_float_ops if flops is not None else 0


In [None]:
model = build_paper_lowflops_model()
flops = get_flops(model, batch_size=1)
params = model.count_params()

print("=============================================")
print(f"🧮 FLOPs: {flops / 1e9:.03f} GFLOPs")
print(f"🔢 Parameters: {params:,}")
print("=============================================")


In [None]:
def cbam_block(inputs, reduction_ratio=8):
    channel = inputs.shape[-1]

    # Channel Attention
    avg_pool = tf.reduce_mean(inputs, axis=[1, 2], keepdims=True)
    max_pool = tf.reduce_max(inputs, axis=[1, 2], keepdims=True)

    dense1 = tf.keras.layers.Dense(channel // reduction_ratio, activation='relu', use_bias=False)
    dense2 = tf.keras.layers.Dense(channel, use_bias=False)

    avg_out = dense2(dense1(avg_pool))
    max_out = dense2(dense1(max_pool))
    scale = tf.keras.activations.sigmoid(avg_out + max_out)
    x = tf.multiply(inputs, scale)

    # Spatial Attention
    avg_pool_spatial = tf.reduce_mean(x, axis=-1, keepdims=True)
    max_pool_spatial = tf.reduce_max(x, axis=-1, keepdims=True)
    concat = tf.concat([avg_pool_spatial, max_pool_spatial], axis=-1)
    spatial = tf.keras.layers.Conv2D(1, kernel_size=7, padding='same', activation='sigmoid')(concat)
    return tf.multiply(x, spatial)


In [None]:
def fused_mbconv(x, out_filters, expansion, stride):
    input_channels = x.shape[-1]
    x = tf.keras.layers.Conv2D(input_channels * expansion, 3, strides=stride, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)
    x = tf.keras.layers.Conv2D(out_filters, 1, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = cbam_block(x)
    return x
def mbconv(x, out_filters, expansion, stride):
    input_channels = x.shape[-1]
    
    # Expansion phase
    expanded = tf.keras.layers.Conv2D(input_channels * expansion, 1, padding='same')(x)
    expanded = tf.keras.layers.BatchNormalization()(expanded)
    expanded = tf.keras.layers.ReLU()(expanded)

    # Depthwise convolution
    depthwise = tf.keras.layers.DepthwiseConv2D(3, strides=stride, padding='same')(expanded)
    depthwise = tf.keras.layers.BatchNormalization()(depthwise)
    depthwise = tf.keras.layers.ReLU()(depthwise)

    # Projection
    projected = tf.keras.layers.Conv2D(out_filters, 1, padding='same')(depthwise)
    projected = tf.keras.layers.BatchNormalization()(projected)
    projected = cbam_block(projected)

    # Residual connection
    if stride == 1 and input_channels == out_filters:
        return tf.keras.layers.Add()([x, projected])
    else:
        return projected


In [None]:
from tensorflow.keras.applications import EfficientNetV2S
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout

def build_cbam_efficientnetv2s(input_shape=(224, 224, 3), num_classes=4):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, 3, strides=2, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    blocks = [
        (1, 16, 2, 1, 'fused'),
        (2, 24, 3, 2, 'fused'),
        (2, 32, 3, 2, 'fused'),
        (3, 64, 4, 2, 'mbconv'),
        (3, 96, 6, 1, 'mbconv'),
        (4, 160, 12, 2, 'mbconv'),
        (4, 128, 2, 1, 'mbconv')
    ]

    for expansion, out_filters, repeats, stride, block_type in blocks:
        for i in range(repeats):
            s = stride if i == 0 else 1
            if block_type == 'fused':
                x = fused_mbconv(x, out_filters, expansion, s)
            else:
                x = mbconv(x, out_filters, expansion, s)

    x = Conv2D(128, 1, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.2)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    return Model(inputs, outputs)

In [None]:
model = build_cbam_efficientnetv2s()
flops = get_flops(model, batch_size=1)
params = model.count_params()

print("=============================================")
print(f"🧮 FLOPs: {flops / 1e9:.03f} GFLOPs")
print(f"🔢 Parameters: {params:,}")
print("=============================================")


In [None]:
! pip install imutils
!pip install tensorflow

import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import imutils
import pathlib
import time
import PIL as pil
import shutil
from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt
import cv2
import gc
from sklearn.metrics import confusion_matrix, classification_report
import itertools
import joblib



IMAGE_SIZE = (224, 224)
BASE_LR = 2e-5
EPOCH = 20
BATCH_SIZE = 8

In [None]:
!pip install tensorflow

In [None]:
def crop_img(img, image_size=(224, 224)):
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)

    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    
    if len(cnts) == 0:
        print("Warning: No contours found, returning resized original image.")
        return cv2.resize(img, image_size, interpolation=cv2.INTER_CUBIC)
    
    c = max(cnts, key=cv2.contourArea)

    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])

    ADD_PIXELS = 0
    new_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS, extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()
    new_img = cv2.resize(new_img, image_size, interpolation=cv2.INTER_CUBIC)
    return new_img


In [None]:
def image_preprocessing(source_dir, saved_root_dir, image_size=(224, 224), channels=3):
    if not os.path.exists(source_dir):
        raise Exception(f"Source directory: {source_dir} does not exist")
    if not os.path.isdir(source_dir):
        raise Exception(f"Source path: {source_dir} is not a directory")

    if not os.path.exists(saved_root_dir):
        os.makedirs(saved_root_dir)
        
    source_dir_path = pathlib.Path(source_dir)
    
    for p in tqdm(source_dir_path.iterdir(), desc="Processing folders"):
        dir_name = str(p).split("/")[-1]
        for fp in p.iterdir():
            filename = str(fp).split("/")[-1]

            img = tf.io.read_file(str(fp))
            img = tf.image.decode_jpeg(img, channels=channels)
            img = crop_img(img.numpy(), image_size)
            # img = cv2.imread(str(fp))
            # if img is None:
            #     continue

            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # # === Apply CLAHE enhancement ===
            # gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            # enhanced_gray = clahe.apply(gray)
            # img = cv2.cvtColor(enhanced_gray, cv2.COLOR_GRAY2RGB)

            # # === Apply cropping ===
            # img = crop_img(img, image_size)

            img = pil.Image.fromarray(img)

            saved_dist_dir = os.path.join(saved_root_dir, dir_name)
            if not os.path.exists(saved_dist_dir):
                os.makedirs(saved_dist_dir)

            img_dist_path = os.path.join(saved_dist_dir, filename)
            img.save(img_dist_path)
    print(f"\n✅ All images processed and saved to: {saved_root_dir}")


In [None]:
image_preprocessing(
    "/kaggle/input/brain-tumor-mri-dataset/Training",
    "/kaggle/working/processed/Training",
    image_size=IMAGE_SIZE
)

image_preprocessing(
    "/kaggle/input/brain-tumor-mri-dataset/Testing",
    "/kaggle/working/processed/Testing",
    image_size=IMAGE_SIZE
)


# Datasets

In [None]:
root_dir_path = "/kaggle/working/brain-tumor-mri-dataset"

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    "/kaggle/working/processed/Training",
    label_mode="categorical",
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    seed=42
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    "/kaggle/working/processed/Testing",
    label_mode="categorical",
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    shuffle=False
)


In [None]:
class_names = train_ds.class_names
print("Class names:", class_names)

cls_to_id = {c: i for i, c in enumerate(class_names)}
id_to_cls = {i: c for i, c in enumerate(class_names)}
print("cls_to_id:", cls_to_id)
print("id_to_cls:", id_to_cls)

In [None]:
# ✅ Apply real-time data augmentation to training set only
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

train_ds = train_ds.map(lambda x, y: (data_augmentation(x), y))


In [None]:
with open("class_to_id.txt", "w") as f:
    for k, v in cls_to_id.items():
        f.write(f"{k}\t{v}\n")

with open("id_to_class.txt", "w") as f:
    for k, v in id_to_cls.items():
        f.write(f"{k}\t{v}\n")

In [None]:
with open("class_to_id.txt", "r") as f:
    for line in f.readlines():
        cls, label = line.replace("\n","").split("\t")
        print(cls, int(label))
print("\n")
with open("id_to_class.txt", "r") as f:
    for line in f.readlines():
        label, cls = line.replace("\n","").split("\t")
        print(int(label), cls)

In [None]:
def class_weight_from_one_hot(ds):
    class_labels = []
    if ds.__class__.__name__ == "_BatchDataset":
        ds = ds.unbatch()

    for _, onehot in ds:
        label = tf.argmax(onehot).numpy()
        if label.shape == ():  # scalar
            class_labels.append(label.item())
        else:  # batch
            class_labels.extend([l.item() for l in label])

    unique_classes = np.unique(class_labels)
    class_weights = compute_class_weight(class_weight="balanced", 
                                         classes=unique_classes,
                                         y=class_labels)
    return {i: w for i, w in enumerate(class_weights)}


    
class_weights = class_weight_from_one_hot(train_ds)
print(class_weights)

In [None]:
for images, labels in train_ds.take(1):
    image, label = images[0], labels[0]
    plt.figure()
    plt.imshow(tf.cast(image, tf.uint8))
    plt.title(class_names[tf.argmax(label).numpy()])
    plt.axis("off")
    plt.show()


# Training

In [None]:
class PrintLR(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        lr = self.model.optimizer.lr
        if hasattr(lr, 'numpy'):
            print(f"🔁 Epoch {epoch+1}: Learning rate = {lr.numpy():.6f}")
        else:
            print(f"🔁 Epoch {epoch+1}: Learning rate = {tf.keras.backend.get_value(lr):.6f}")


In [None]:
def build_lrfn(
    lr_start=1e-4,
    lr_max=5e-4,
    lr_min=1e-6,
    lr_rampup_epochs=5,
    lr_sustain_epochs=5,
    lr_exp_decay=0.8
):
    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) * lr_exp_decay**(epoch - lr_rampup_epochs - lr_sustain_epochs) + lr_min
        return lr
    return lrfn


In [None]:
# def build_lrfn(lr_start=2e-5, lr_max=1e-3,
#                lr_min=1e-6, lr_rampup_epochs=8,
#                lr_sustain_epochs=0, lr_exp_decay=0.8):

#     def lrfn(epoch):
#         if epoch < lr_rampup_epochs:
#             lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
#         elif epoch < lr_rampup_epochs + lr_sustain_epochs:
#             lr = lr_max
#         else:
#             lr = (lr_max - lr_min) * \
#                  lr_exp_decay ** (epoch - lr_rampup_epochs - lr_sustain_epochs) + lr_min
#         return lr

#     return lrfn

In [None]:
import os
import shutil

# Remove previous best model file if it exists
if os.path.exists("best_initial_model.keras"):
    os.remove("best_initial_model.keras")


In [None]:
import os
import shutil
import gc
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, LearningRateScheduler, ModelCheckpoint
from tensorflow.keras import backend as K
import tensorflow as tf
from collections import defaultdict

# === Assumes the following are defined ===
# BASE_LR, build_lrfn(), build_paper_lowflops_model(), class_weights, train_ds, test_ds

# Learning Rate Schedule
lrfn = build_lrfn()
lr_schedule = LearningRateScheduler(lrfn, verbose=True)

# Base Callbacks
base_cbs = [
    EarlyStopping(patience=10, monitor='val_loss', restore_best_weights=True),
    lr_schedule,
    PrintLR()
    # ReduceLROnPlateau(patience=5, factor=0.5),
    # lr_schedule
]

# Convert tf.data.Dataset to numpy arrays
def dataset_to_numpy(ds):
    data = list(ds.unbatch().as_numpy_iterator())
    images = np.array([x[0] for x in data])
    labels = np.array([x[1] for x in data])
    return images, labels

images, labels = dataset_to_numpy(train_ds)
label_indices = np.argmax(labels, axis=1)

# Trackers
best_model = {"fold": None, "train_accuracy": 0, "val_accuracy": 0, "test_accuracy": 0, "model": None}
accuracies = []
history_storage = defaultdict(list)

# Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, val_idx) in enumerate(skf.split(images, label_indices)):
    print(f"\n📂 Fold {fold + 1}")

    # Split data
    X_train, X_val = images[train_idx], images[val_idx]
    y_train, y_val = labels[train_idx], labels[val_idx]

    train_fold_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(8).prefetch(tf.data.AUTOTUNE)
    val_fold_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(8).prefetch(tf.data.AUTOTUNE)

    # Unique checkpoint per fold
    checkpoint_path = f"best_lowflops_fold{fold+1}.weights.h5"
    if os.path.exists(checkpoint_path):
        os.remove(checkpoint_path)

    model_checkpoint = ModelCheckpoint(
        checkpoint_path,
        monitor='val_categorical_accuracy',
        save_best_only=True,
        mode='max',
        save_weights_only=True  # ✅ Prevent internal name conflict
    )

    # Build and compile model
    model = build_paper_lowflops_model()
    model.compile(
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
        optimizer=tf.keras.optimizers.Adam(BASE_LR),
        metrics=["categorical_accuracy"]
    )

    # Train
    history = model.fit(
        train_fold_ds,
        validation_data=val_fold_ds,
        epochs=40,
        callbacks=base_cbs + [model_checkpoint],
        class_weight=class_weights,
        verbose=1
    )
        # Load best weights for this fold
    model.load_weights(checkpoint_path)

    # Store history
    for metric in history.history:
        history_storage[metric].append(history.history[metric])

    # Evaluate
    test_acc = model.evaluate(test_ds, verbose=0)[1]
    accuracies.append(test_acc)

    if test_acc > best_model["test_accuracy"]:
        best_model = {
            "fold": fold + 1,
            "train_accuracy": max(history.history['categorical_accuracy']),
            "val_accuracy": max(history.history['val_categorical_accuracy']),
            "test_accuracy": test_acc,
            "model": model
        }

    K.clear_session()
    gc.collect()

    print(f"\n✅ Fold {fold + 1} Results:")
    print(f"Test Accuracy: {test_acc:.4f}")

# Final Summary
print("\n🏆 Best Fold:", best_model["fold"])
print(f"Train Accuracy: {best_model['train_accuracy']:.4f}")
print(f"Validation Accuracy: {best_model['val_accuracy']:.4f}")
print(f"Test Accuracy: {best_model['test_accuracy']:.4f}")
print(f"Mean Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")

# Save best model (weights only)
best_model["model"].save_weights("best_lowflops_model.weights.h5")

# Plotting Functions
from tensorflow.keras.utils import pad_sequences

def plot_combined_histories(history_storage):
    plt.figure(figsize=(15, 10))
    metrics = ['loss', 'categorical_accuracy', 'val_loss', 'val_categorical_accuracy']
    titles = ['Training Loss', 'Training Accuracy', 'Validation Loss', 'Validation Accuracy']
    color = '#1f77b4'

    for idx, metric in enumerate(metrics):
        plt.subplot(2, 2, idx + 1)
        if metric in history_storage:
            padded = pad_sequences(history_storage[metric], padding='post', dtype='float32')
            for h in padded:
                plt.plot(h, color=color, alpha=0.3)
            plt.plot(np.mean(padded, axis=0), color=color, linestyle='--')
        plt.title(titles[idx])
        plt.xlabel('Epochs')
        plt.ylabel(metric.replace('_', ' ').title())
    plt.tight_layout()
    plt.suptitle('Training Curves Across Folds', y=1.02)
    plt.show()

In [None]:
def plot_model_metrics(model_key, label, history_storage):
    """Plot training vs validation metrics for a specific model"""
    
    metrics_to_plot = [
        ('loss', 'val_loss'),
        ('categorical_accuracy', 'val_categorical_accuracy')
    ]
    
    for train_metric, val_metric in metrics_to_plot:
        if train_metric in history_storage[model_key] and val_metric in history_storage[model_key]:
            train_histories = history_storage[model_key][train_metric]
            val_histories = history_storage[model_key][val_metric]

            train_mean = np.mean(train_histories, axis=0)
            train_std = np.std(train_histories, axis=0)

            val_mean = np.mean(val_histories, axis=0)
            val_std = np.std(val_histories, axis=0)

            # Print values
            print(f"\n📊 {label} - {train_metric.replace('_', ' ').title()} vs {val_metric.replace('_', ' ').title()}:")
            for i, (tr_m, tr_s, vl_m, vl_s) in enumerate(zip(train_mean, train_std, val_mean, val_std)):
                print(f"Epoch {i+1}: Train = {tr_m:.4f} ± {tr_s:.4f}, Val = {vl_m:.4f} ± {vl_s:.4f}")

            # Plotting
            plt.figure(figsize=(8, 5))
            epochs = range(len(train_mean))
            plt.plot(epochs, train_mean, label=f"Train {train_metric.replace('_', ' ').title()}")
            plt.fill_between(epochs, train_mean - train_std, train_mean + train_std, alpha=0.2)

            plt.plot(epochs, val_mean, label=f"Val {val_metric.replace('_', ' ').title()}")
            plt.fill_between(epochs, val_mean - val_std, val_mean + val_std, alpha=0.2)

            plt.title(f"{label} - {train_metric.replace('_', ' ').title()} vs {val_metric.replace('_', ' ').title()}")
            plt.xlabel("Epochs")
            plt.ylabel(train_metric.replace("_", " ").title())
            plt.legend()
            plt.grid(True)
            plt.tight_layout()
            plt.show()

# Mapping of model types
model_name_map = {
    "attention": "Attention Model",
    "baseline": "Baseline Model"
}

# Plot per model
for model_key, label in model_name_map.items():
    plot_model_metrics(model_key, label, history_storage)


In [None]:
from sklearn.metrics import classification_report

# ✅ Get true labels from test_ds
y_true = np.concatenate([y.numpy() for _, y in test_ds], axis=0)
y_true = np.argmax(y_true, axis=1)

# ✅ Get predictions from the best model
y_pred_probs = best_model["model"].predict(test_ds, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)

# ✅ Define class names
class_names = ['glioma', 'meningioma', 'no_tumor', 'pituitary']

# ✅ Print classification report
report = classification_report(
    y_true,
    y_pred,
    target_names=class_names,
    digits=4
)

print("📊 Classification Report:\n")
print(report)


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# ✅ Step 1: Get true labels and predictions from test_ds
y_true = []
y_pred = []

for images, labels in test_ds:
    preds = best_model["model"].predict(images, verbose=0)
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# ✅ Step 2: Compute the confusion matrix
class_names = ['glioma', 'meningioma', 'no_tumor', 'pituitary']
cm = confusion_matrix(y_true, y_pred)

# ✅ Step 3: Plot using seaborn heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)

plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.show()


In [None]:
def get_best_model(history, metric='val_categorical_accuracy'):
    val_histories = history[metric]
    best_scores = [np.max(val_acc) for val_acc in val_histories]
    best_fold_idx = np.argmax(best_scores)
    return best_fold_idx
def plot_best_model(history, model_label, metric_key='val_categorical_accuracy'):
    best_fold = get_best_fold_index(history, metric=metric_key)

    train_acc = history['categorical_accuracy'][best_fold]
    val_acc = history['val_categorical_accuracy'][best_fold]
    train_loss = history['loss'][best_fold]
    val_loss = history['val_loss'][best_fold]

    print(f"📌 Best Fold Index for {model_label}: {best_fold}")

    plt.figure(figsize=(14, 5))

    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(train_acc, label="Training Accuracy")
    plt.plot(val_acc, label="Validation Accuracy")
    plt.title(f"{model_label} - Accuracy)")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.grid(True)
    plt.legend()

    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(train_loss, label="Training Loss")
    plt.plot(val_loss, label="Validation Loss")
    plt.title(f"{model_label} - Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.grid(True)
    plt.legend()

    plt.tight_layout()
    plt.show()
for model_key, label in model_name_map.items():
    print(f"\n🔍 Analyzing best fold for: {label}")
    plot_best_fold(history_storage[model_key], label)
