In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt

data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=30,
    shear_range=0.2,
    zoom_range=0.2,
    rescale=1/255.0,
    fill_mode='nearest',
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,  
    brightness_range=[0.8, 1.2],
    vertical_flip=False,
)

val_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1/255.0,
)
train_gen = data_gen.flow_from_directory(
    directory="/kaggle/input/brain-tumor-mri-dataset/Training",
    target_size = (224,224),
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    color_mode='rgb',
)

test_gen = val_gen.flow_from_directory(
    directory = "/kaggle/input/brain-tumor-mri-dataset/Testing",
    target_size = (224,224),
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    color_mode='rgb',
)

2025-05-06 02:02:09.424301: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746496929.657648      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746496929.726964      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.


In [2]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Reshape, Dropout, BatchNormalization, GlobalAveragePooling1D, LayerNormalization, Add
from itertools import product
import numpy as np

# Build Model Function
def build_model(learning_rate, dropout_rate, embed_dim, num_heads):
    input_layer = Input(shape=(224, 224, 3))
    base_model = DenseNet121(include_top=False, weights='imagenet', input_tensor=input_layer)
    base_model.trainable = True

    for layer in base_model.layers[:-40]:  
        layer.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x_1 = Reshape((1, -1))(x)
    x_1 = Dense(embed_dim)(x_1)
    x_residual = x_1
    x_1 = LayerNormalization(epsilon=1e-6)(x_1)

    mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim // num_heads)
    x_att = mha(query=x_1, value=x_1, key=x_1)
    x_att = LayerNormalization(epsilon=1e-6)(x_att)
    x_att = Add()([x_att, x_residual])

    x_ffn = Dense(embed_dim, activation='relu')(x_att)
    x_ffn = Dense(embed_dim)(x_ffn)
    x_ffn = LayerNormalization(epsilon=1e-6)(x_ffn)
    x = Add()([x_att, x_ffn])
    x = GlobalAveragePooling1D()(x)
    x = Dropout(dropout_rate)(x)
    x = BatchNormalization()(x)
    x = Dense(64, activation="relu")(x)
    x = Dropout(dropout_rate)(x)
    x = BatchNormalization()(x)
    x = Dense(32, activation="relu")(x)
    output = Dense(4, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, clipnorm=1.0),
        metrics=['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')]
    )
    return model

learning_rates = [1e-5, 5e-5, 1e-4]
dropout_rates = [0.3, 0.5]
embed_dims = [512, 1024]
num_heads_list = [4, 8]
best_val_loss = float('inf')
best_model = None
best_hparams = {}

for lr, dr, ed, nh in product(learning_rates, dropout_rates, embed_dims, num_heads_list):
    print(f"Training with lr={lr}, dropout={dr}, embed_dim={ed}, num_heads={nh}")
    model = build_model(learning_rate=lr, dropout_rate=dr, embed_dim=ed, num_heads=nh)
    early_stopping = tf.keras.callbacks.EarlyStopping(patience=8, monitor='val_loss', mode='min', restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(f'model_lr{lr}_dr{dr}_ed{ed}_nh{nh}.keras', monitor='val_loss', save_best_only=True)
    
    history = model.fit(
        train_gen,
        epochs=5,
        batch_size=32,
        validation_data=test_gen,
        callbacks=[early_stopping, reduce_lr, checkpoint]
    )
    
    val_loss = min(history.history['val_loss'])
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model
        best_hparams = {'learning_rate': lr, 'dropout_rate': dr, 'embed_dim': ed, 'num_heads': nh}

print(f"Best hyperparameters: {best_hparams}")
best_model.save('final_best_model.h5')

Training with lr=1e-05, dropout=0.3, embed_dim=512, num_heads=4


I0000 00:00:1746496948.081853      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/5


  self._warn_if_super_not_called()
I0000 00:00:1746496992.024429      74 service.cc:148] XLA service 0x7d87102295b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746496992.025433      74 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1746496996.605951      74 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  2/179[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 62ms/step - accuracy: 0.2266 - loss: 1.6405 - precision: 0.1951 - recall: 0.0703   

I0000 00:00:1746497016.048905      74 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 834ms/step - accuracy: 0.4240 - loss: 1.2997 - precision: 0.4898 - recall: 0.2674 - val_accuracy: 0.7582 - val_loss: 0.6895 - val_precision: 0.8408 - val_recall: 0.6407 - learning_rate: 1.0000e-05
Epoch 2/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 425ms/step - accuracy: 0.6781 - loss: 0.7643 - precision: 0.7706 - recall: 0.5607 - val_accuracy: 0.8101 - val_loss: 0.5675 - val_precision: 0.8590 - val_recall: 0.7529 - learning_rate: 1.0000e-05
Epoch 3/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 426ms/step - accuracy: 0.7709 - loss: 0.6081 - precision: 0.8321 - recall: 0.6727 - val_accuracy: 0.8200 - val_loss: 0.5157 - val_precision: 0.8639 - val_recall: 0.7796 - learning_rate: 1.0000e-05
Epoch 4/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 426ms/step - accuracy: 0.8056 - loss: 0.5393 - precision: 0.8568 - recall: 0.7288 - val_accuracy: 0.8276 - val



[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 648ms/step - accuracy: 0.4248 - loss: 1.3205 - precision: 0.4997 - recall: 0.2443 - val_accuracy: 0.7376 - val_loss: 0.6503 - val_precision: 0.8087 - val_recall: 0.6545 - learning_rate: 1.0000e-05
Epoch 2/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 438ms/step - accuracy: 0.7087 - loss: 0.7346 - precision: 0.8087 - recall: 0.5821 - val_accuracy: 0.7902 - val_loss: 0.5488 - val_precision: 0.8323 - val_recall: 0.7384 - learning_rate: 1.0000e-05
Epoch 3/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 435ms/step - accuracy: 0.8083 - loss: 0.5481 - precision: 0.8752 - recall: 0.7268 - val_accuracy: 0.8207 - val_loss: 0.4891 - val_precision: 0.8488 - val_recall: 0.7750 - learning_rate: 1.0000e-05
Epoch 4/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 433ms/step - accuracy: 0.8375 - loss: 0.4839 - precision: 0.8858 - recall: 0.7747 - val_accuracy: 0.8337 - val