In [2]:
# Step 0: Imports
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

In [3]:

# Step 1: Load Data
def load_images_and_masks(base_dir, classes=['benign','malignant']):
    images = []
    labels = []
    masks = []
    for cls in classes:
        cls_dir = os.path.join(base_dir, 'training_set', cls)
        for fname in os.listdir(cls_dir):
            if fname.endswith('.png') and not fname.endswith('_mask.png'):
                img_path = os.path.join(cls_dir, fname)
                mask_name = fname.split('.')[0] + '_mask.png'
                mask_path = os.path.join(cls_dir, mask_name)

                # load image
                img = Image.open(img_path).convert('L')  # grayscale
                img = img.resize((224, 224))
                img_arr = img_to_array(img) / 255.0

                # load mask
                if os.path.exists(mask_path):
                    mask = Image.open(mask_path).convert('L')
                    mask = mask.resize((224, 224))
                    mask_arr = img_to_array(mask) / 255.0
                else:
                    # if no mask, use zeros
                    mask_arr = np.zeros((224, 224, 1))

                images.append(img_arr)
                masks.append(mask_arr)

                # label
                labels.append(1 if cls == 'malignant' else 0)
    return np.array(images), np.array(masks), np.array(labels)

In [4]:
base_dir = '../data/complete_set'  # path to complete_set
X, mask_X, y = load_images_and_masks(base_dir)

In [5]:
# Step 2: Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
mask_train, mask_test, _, _ = train_test_split(mask_X, y, test_size=0.2, random_state=42)

In [13]:
# Step 3: Build a Simple CNN Classifier
from keras.layers import Input
num_classes = 2  # benign, malignant

model = Sequential([
    Input(shape=(224,224,1)),
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])


In [7]:
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [8]:
# Step 4: Train Model
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_split=0.2)

Epoch 1/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 681ms/step - accuracy: 0.6759 - loss: 1.1937 - val_accuracy: 0.6923 - val_loss: 0.5840
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 668ms/step - accuracy: 0.7091 - loss: 0.5248 - val_accuracy: 0.6813 - val_loss: 0.5527
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 641ms/step - accuracy: 0.8033 - loss: 0.4265 - val_accuracy: 0.8242 - val_loss: 0.5222
Epoch 4/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 641ms/step - accuracy: 0.8532 - loss: 0.3683 - val_accuracy: 0.7802 - val_loss: 0.4820
Epoch 5/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 650ms/step - accuracy: 0.9003 - loss: 0.2605 - val_accuracy: 0.7802 - val_loss: 0.5659
Epoch 6/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 670ms/step - accuracy: 0.9363 - loss: 0.1945 - val_accuracy: 0.8571 - val_loss: 0.4023
Epoch 7/20
[1m23/23[

<keras.src.callbacks.history.History at 0x22ff9a0ef90>

In [9]:
# Step 5: Evaluate
y_pred = np.argmax(model.predict(X_test), axis=1)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 200ms/step


In [10]:
y_pred

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0])

In [11]:
print(classification_report(y_test, y_pred, target_names=['Benign','Malignant']))

              precision    recall  f1-score   support

      Benign       0.87      0.88      0.88        85
   Malignant       0.63      0.61      0.62        28

    accuracy                           0.81       113
   macro avg       0.75      0.74      0.75       113
weighted avg       0.81      0.81      0.81       113



In [12]:
print(confusion_matrix(y_test, y_pred))

[[75 10]
 [11 17]]
