In [21]:
import os
import shutil

base = "../data/SIPaKMeD"

mapping = {
    "im_Dyskeratotic": "abnormal",
    "im_Koilocytotic": "abnormal",
    "im_Metaplastic": "normal",
    "im_Parabasal": "normal",
    "im_Superficial-Intermediate": "normal"
}

for src, dest in mapping.items():
    src_path = os.path.join(base, src)
    dest_path = os.path.join(base, dest)
    os.makedirs(dest_path, exist_ok=True)

    copied = 0

    for root, dirs, files in os.walk(src_path):
        for file in files:
            if file.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):
                shutil.copy(
                    os.path.join(root, file),
                    os.path.join(dest_path, file)
                )
                copied += 1

    print(f"âœ… {src} â†’ {dest}: {copied} images copied")

print("ðŸŽ¯ SIPaKMeD restructuring DONE")




âœ… im_Dyskeratotic â†’ abnormal: 1036 images copied
âœ… im_Koilocytotic â†’ abnormal: 1063 images copied
âœ… im_Metaplastic â†’ normal: 1064 images copied
âœ… im_Parabasal â†’ normal: 895 images copied
âœ… im_Superficial-Intermediate â†’ normal: 957 images copied
ðŸŽ¯ SIPaKMeD restructuring DONE


In [22]:
print("Normal:", len(os.listdir("../data/SIPaKMeD/normal")))
print("Abnormal:", len(os.listdir("../data/SIPaKMeD/abnormal")))



Normal: 1808
Abnormal: 1434


In [24]:
for root, dirs, files in os.walk("../data/SIPaKMeD/im_Dyskeratotic"):
    print(root)
    print("files:", files[:5])
    break



../data/SIPaKMeD/im_Dyskeratotic
files: []


In [25]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam


In [36]:
IMG_SIZE = 128

data_dir = "../data/SIPaKMeD"
categories = ["normal", "abnormal"]

images = []
labels = []

for category in categories:
    folder_path = os.path.join(data_dir, category)
    for file in os.listdir(folder_path):
        if file.lower().endswith(".bmp"):
            img_path = os.path.join(folder_path, file)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            images.append(img)
            labels.append(category)

images = np.array(images)
labels = np.array(labels)

print("Images shape:", images.shape)
print("Labels count:", len(labels))


Images shape: (3242, 128, 128, 3)
Labels count: 3242


In [37]:
IMG_SIZE = 128


data_dir = "../data/SIPaKMeD"
categories = ["normal", "abnormal"]

images = []
labels = []

for category in categories:
    folder_path = os.path.join(data_dir, category)
    for file in os.listdir(folder_path):
        if file.lower().endswith(".bmp"):
            img_path = os.path.join(folder_path, file)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            images.append(img)
            labels.append(category)

images = np.array(images)
labels = np.array(labels)

print("Images shape:", images.shape)
print("Labels count:", len(labels))


Images shape: (3242, 128, 128, 3)
Labels count: 3242


In [38]:
# Normalize
images = images / 255.0

# Encode labels
le = LabelEncoder()
labels = le.fit_transform(labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.2, random_state=42
)

print("Train:", X_train.shape)
print("Test:", X_test.shape)


Train: (2593, 128, 128, 3)
Test: (649, 128, 128, 3)


In [39]:
base_model = MobileNetV2(
    input_shape=(128, 128, 3),
    include_top=False,
    weights="imagenet"
)

base_model.trainable = False  # VERY IMPORTANT


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5


In [40]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.5)(x)
output = Dense(1, activation="sigmoid")(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 64, 64, 32)   864         ['input_2[0][0]']                
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 64, 64, 32)   128         ['Conv1[0][0]']                  
                                                                                                  
 Conv1_relu (ReLU)              (None, 64, 64, 32)   0           ['bn_Conv1[0][0]']         

In [41]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=3,
    batch_size=16
)


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [44]:
loss, accuracy = model.evaluate(
    X_test, y_test,
    batch_size=16
)

print("Test Accuracy:", accuracy)



Test Accuracy: 0.8890600800514221


In [45]:
model.save("../models/cervical_cancer_cnn.h5")
print("âœ… Cervical cancer model saved successfully")


âœ… Cervical cancer model saved successfully
