# 1. Installation

In [None]:
import os
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split


# 2. Data Handling

In [None]:
FULL_DATA_DIR = '/kaggle/input/cell-images-for-detecting-malaria/cell_images'
SAMPLED_DATA_DIR = '/kaggle/working/malaria_subset'
SPLIT_DATA_DIR = '/kaggle/working/malaria_split'
IMG_SIZE = (128, 128)
BATCH_SIZE = 64
EPOCHS = 10
OUTPUT_MODEL_PATH = '/kaggle/working/malaria_cnn.h5'
SAMPLE_LIMIT = 5000

In [None]:
if os.path.exists(SAMPLED_DATA_DIR):
    shutil.rmtree(SAMPLED_DATA_DIR)
os.makedirs(SAMPLED_DATA_DIR, exist_ok=True)

for cls in ['Parasitized', 'Uninfected']:
    src_dir = os.path.join(FULL_DATA_DIR, cls)
    dst_dir = os.path.join(SAMPLED_DATA_DIR, cls)
    os.makedirs(dst_dir, exist_ok=True)

    files = [f for f in os.listdir(src_dir) if f.lower().endswith(('.png','.jpg','.jpeg'))]
    random.shuffle(files)
    subset = files[:SAMPLE_LIMIT]

    for f in subset:
        shutil.copy(os.path.join(src_dir, f), os.path.join(dst_dir, f))

print("Subset created:", {c: len(os.listdir(os.path.join(SAMPLED_DATA_DIR, c))) for c in ['Parasitized','Uninfected']})


In [None]:
if os.path.exists(SPLIT_DATA_DIR):
    shutil.rmtree(SPLIT_DATA_DIR)
train_dir = os.path.join(SPLIT_DATA_DIR, 'train')
val_dir = os.path.join(SPLIT_DATA_DIR, 'val')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

for cls in ['Parasitized', 'Uninfected']:
    src_cls_dir = os.path.join(SAMPLED_DATA_DIR, cls)
    files = os.listdir(src_cls_dir)
    train_files, val_files = train_test_split(files, test_size=0.2, random_state=42)

    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)

    for f in train_files:
        shutil.copy(os.path.join(src_cls_dir, f), os.path.join(train_dir, cls, f))
    for f in val_files:
        shutil.copy(os.path.join(src_cls_dir, f), os.path.join(val_dir, cls, f))

print("Data split done:")
print("Train:", {c: len(os.listdir(os.path.join(train_dir, c))) for c in ['Parasitized','Uninfected']})
print("Val:", {c: len(os.listdir(os.path.join(val_dir, c))) for c in ['Parasitized','Uninfected']})


# 4. Dataset split

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)


# 5. Model Building 

In [None]:
def build_model(input_shape=(*IMG_SIZE, 3)):
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Conv2D(64, (3,3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Conv2D(128, (3,3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    return model

model = build_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


# 6. Training

In [7]:
callbacks = [
    ModelCheckpoint(OUTPUT_MODEL_PATH, monitor='val_accuracy', save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1),
    EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True, verbose=1)
]


history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks,
    verbose=1
)



[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step - accuracy: 0.7245 - loss: 0.5681
Epoch 3: val_accuracy improved from 0.50100 to 0.50150, saving model to /kaggle/working/malaria_cnn.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 274ms/step - accuracy: 0.7246 - loss: 0.5679 - val_accuracy: 0.5015 - val_loss: 3.5708 - learning_rate: 0.0010
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step - accuracy: 0.9192 - loss: 0.2470
Epoch 6: val_accuracy did not improve from 0.65650
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 268ms/step - accuracy: 0.9192 - loss: 0.2471 - val_accuracy: 0.5000 - val_loss: 1.5495 - learning_rate: 0.0010
Epoch 7/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step - accuracy: 0.9193 - loss: 0.2353
Epoch 7: val_accuracy improved from 0.65650 to 0.87050, saving model to /kaggle/working/malaria_cnn.h5
[1m125/125[0m [32m━━━━━━━━━━━

In [8]:

model.save(OUTPUT_MODEL_PATH)
print(f"Model saved to: {OUTPUT_MODEL_PATH}")



Model saved to: /kaggle/working/malaria_cnn.h5


# 7. Evaluation


In [9]:

val_loss, val_acc = model.evaluate(val_generator)
print(f"Validation loss: {val_loss:.4f}, Validation acc: {val_acc:.4f}")

try:
    from IPython.display import FileLink
    print('Download link:')
    display(FileLink(OUTPUT_MODEL_PATH))
except Exception:
    pass

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step - accuracy: 0.9220 - loss: 0.2174
Validation loss: 0.1875, Validation acc: 0.9390
Download link:
