# Fine-Tuning MobileNetV2 for Malaria Detection
In this notebook, we fine-tune the top layers of a pretrained MobileNetV2 model previously trained on the malaria dataset. The goal is to improve validation accuracy beyond the frozen transfer learning baseline.

In [5]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import shutil
import os

## Download Malaria Kaggle Dataset

In [6]:
RAW_DATA_DIR = "data/raw"
BASE_DIR = os.path.join(RAW_DATA_DIR, "cell_images") # Where Kaggle dataset is unzipped
CLEAN_DIR = os.path.join(RAW_DATA_DIR, "clean_cell_images") # Clean folder for training

# Make sure raw folder exists
os.makedirs(RAW_DATA_DIR, exist_ok=True)

# Set Kaggle config directory to current folder
os.environ['KAGGLE_CONFIG_DIR'] = os.getcwd()

# Check for kaggle.json (you need to upload it in Colab first)
if not os.path.exists('kaggle.json'):
    raise FileNotFoundError("Kaggle API token file 'kaggle.json' not found. Please upload it to the current directory.")

# Download & unzip dataset **only if it doesn't exist**
if not os.path.exists(BASE_DIR):
    !kaggle datasets download -d 'iarunava/cell-images-for-detecting-malaria' -p {RAW_DATA_DIR} --unzip
else:
    print("Dataset already downloaded and unzipped.")

# Detect whether there is a nested cell_images folder
if os.path.exists(os.path.join(BASE_DIR, "cell_images")):
  # Nested case: cell_images/cell_images/Parasitized
  DATA_SOURCE_DIR = os.path.join(BASE_DIR, "cell_images")
else:
  # Normal case: cell_images/Parasitized
  DATA_SOURCE_DIR = BASE_DIR

print("Using data source dictionary:", DATA_SOURCE_DIR)

# Remove old clean folder if it exists
if os.path.exists(CLEAN_DIR):
  shutil.rmtree(CLEAN_DIR)

# Copy clean structure
shutil.copytree(
    os.path.join(DATA_SOURCE_DIR, "Parasitized"),
    os.path.join(CLEAN_DIR, "Parasitized")
)

shutil.copytree(
    os.path.join(DATA_SOURCE_DIR, "Uninfected"),
    os.path.join(CLEAN_DIR, "Uninfected")
)

print("Clean dataset ready at:", CLEAN_DIR)

Dataset already downloaded and unzipped.
Using data source dictionary: data/raw/cell_images/cell_images
Clean dataset ready at: data/raw/clean_cell_images


## Load Data

In [2]:
DATA_SET_PATH = 'data/raw/clean_cell_images'
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

In [7]:
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_generator = datagen.flow_from_directory(
    DATA_SET_PATH,
    target_size=IMG_SIZE,
    class_mode="binary",
    subset="training",
    shuffle=True
)

val_generator = datagen.flow_from_directory(
    DATA_SET_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    subset="validation",
    shuffle=False
)

Found 22048 images belonging to 2 classes.
Found 5510 images belonging to 2 classes.


## Load Frozen MobileNetV2 Model

In [9]:
model = load_model("/content/models/malaria_mobilenetv2_frozen.keras")
model.summary()

## Unfreeze Top Layers for Fine-Tuning

In [10]:
# Freeze most layers, unfreeze top layers
for layer in model.layers:
  layer.trainable = False

# Unfreeze last 30 layers
for layer in model.layers[-30:]:
  layer.trainable = True

print("Trainable layers after unfreezing:")
for layer in model.layers[-40:]:
  print(layer.name, layer.trainable)

Trainable layers after unfreezing:
block_13_expand_relu False
block_13_pad False
block_13_depthwise False
block_13_depthwise_BN False
block_13_depthwise_relu False
block_13_project False
block_13_project_BN False
block_14_expand False
block_14_expand_BN False
block_14_expand_relu False
block_14_depthwise True
block_14_depthwise_BN True
block_14_depthwise_relu True
block_14_project True
block_14_project_BN True
block_14_add True
block_15_expand True
block_15_expand_BN True
block_15_expand_relu True
block_15_depthwise True
block_15_depthwise_BN True
block_15_depthwise_relu True
block_15_project True
block_15_project_BN True
block_15_add True
block_16_expand True
block_16_expand_BN True
block_16_expand_relu True
block_16_depthwise True
block_16_depthwise_BN True
block_16_depthwise_relu True
block_16_project True
block_16_project_BN True
Conv_1 True
Conv_1_bn True
out_relu True
global_average_pooling2d True
dense_2 True
dropout_1 True
dense_3 True


## Recompile with Small Learning Rate

In [12]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss="binary_crossentropy",
    metrics=['accuracy']
)

## Callbacks

In [13]:
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.3,
    patience=3,
    min_lr=1e-6
)

## Final Evaluation

In [14]:
loss, acc = model.evaluate(val_generator)
print(f"Fine-tuned validation accuracy: {acc:.4f}")

  self._warn_if_super_not_called()


[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 411ms/step - accuracy: 0.8793 - loss: 0.3400
Fine-tuned validation accuracy: 0.9132


## Save Fine-Tuned Model

In [17]:
model.save("/content/models/malaria_mobilenetv2_finetuned.keras")
print("Fine-tuned model saved!")

Fine-tuned model saved!
