In [3]:
import os
INPUT_DATASET="BreastCancerData"
BASE_PATH=os.path.join(INPUT_DATASET, "Datasets", "idc")
TRAIN_PATH=os.path.sep.join([BASE_PATH,"training"])
VAL_PATH=os.path.sep.join([BASE_PATH,"validation"])
TEST_PATH=os.path.sep.join([BASE_PATH,"testing"])
TRAIN_SPLIT=0.8
VAL_SPLIT=0.1

In [None]:
from imutils import paths 
import random, shutil, os

originalPaths = list(paths.list_images(INPUT_DATASET))
random.seed(7)
random.shuffle(originalPaths)

# Split into train, val, test
index = int(len(originalPaths) * TRAIN_SPLIT)
trainPaths = originalPaths[:index]
testPaths = originalPaths[index:]

index = int(len(trainPaths) * VAL_SPLIT)
valPaths = trainPaths[:index]
trainPaths = trainPaths[index:]  # fix this line

# Set up destination mappings
datasets = [
    ("train", trainPaths, TRAIN_PATH),
    ("val", valPaths, VAL_PATH),
    ("test", testPaths, TEST_PATH)
]

#Loop through each set and organize
for (setType, imagePaths, basePath) in datasets:
    print(f'Building {setType} set...')

    for path in imagePaths:
        # Get filename and label from it
        file = os.path.basename(path)
        label = file.split('_')[-1].replace('class', '').replace('.png', '')  # label is '0' or '1' from filename like '0_12_8863.png'

        # Create label directory if not exists
        labelPath = os.path.join(basePath, label)
        if not os.path.exists(labelPath):
            os.makedirs(labelPath)

        newPath = os.path.join(labelPath, file)

        #  Prevent same file copy error
        if os.path.abspath(path) == os.path.abspath(newPath):
            continue
        if os.path.exists(newPath):
            continue

        shutil.copy2(path, newPath)

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras import backend as K

class CancerNet:
    @staticmethod
    def build(width, height, depth, classes):
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1

        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1

        model.add(Conv2D(32, (3, 3), activation="relu", input_shape=inputShape))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))

        model.add(Conv2D(64, (3, 3), activation="relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(64, (3, 3), activation="relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))

        model.add(Flatten())
        model.add(Dense(256, activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(Dense(classes, activation="softmax"))

        return model


In [9]:
import matplotlib
matplotlib.use("Agg")  # Used if you want to save plots without displaying them

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Path should match where your data is stored
training_set = train_datagen.flow_from_directory(
    'BreastCancerData/Datasets/idc/training',
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'
)


Found 277524 images belonging to 2 classes.


In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
from imutils import paths
import matplotlib.pyplot as plt 
import numpy as np 
import os 

# Configuration
NUM_EPOCHS = 4
INIT_LR = 1e-2
BS = 32

# Paths and labels
trainPaths = list(paths.list_images(TRAIN_PATH))
valPaths = list(paths.list_images(VAL_PATH))
testPaths = list(paths.list_images(TEST_PATH))

TrainLabels = [int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels = to_categorical(TrainLabels)

# Class weighting to handle imbalance
classTotals = trainLabels.sum(axis=0)
classWeight = classTotals.max() / classTotals

# Data augmentation
trainAug = ImageDataGenerator(
    rescale=1/255.0,
    rotation_range=20,
    zoom_range=0.05,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.05,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest"
)
valAug = ImageDataGenerator(rescale=1/255.0)
testAug = ImageDataGenerator(rescale=1/255.0)

# Generators
trainGen = trainAug.flow_from_directory(
    TRAIN_PATH,
    class_mode="categorical",
    target_size=(48, 48),
    color_mode="rgb",
    shuffle=True,
    batch_size=BS
)
valGen = valAug.flow_from_directory(
    VAL_PATH,
    class_mode="categorical",
    target_size=(48, 48),
    color_mode="rgb",
    shuffle=True,
    batch_size=BS
)
testGen = testAug.flow_from_directory(
    TEST_PATH,
    class_mode="categorical",
    target_size=(48, 48),
    color_mode="rgb",
    shuffle=False,
    batch_size=BS
)


Found 277524 images belonging to 2 classes.
Found 251114 images belonging to 2 classes.
Found 276200 images belonging to 2 classes.


In [13]:
model = CancerNet.build(width=48, height=48, depth=3, classes=2)

model.compile(
    loss="categorical_crossentropy",
    optimizer='adam',
    metrics=["accuracy"]
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
M=model.fit(x=trainGen,validation_data=valGen,epochs=10)
model.save("breast_cancer.h5")

  self._warn_if_super_not_called()


Epoch 1/10
[1m8673/8673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3763s[0m 433ms/step - accuracy: 0.8304 - loss: 0.4113 - val_accuracy: 0.7165 - val_loss: 3.1404
Epoch 2/10
[1m8673/8673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3062s[0m 353ms/step - accuracy: 0.8569 - loss: 0.3375 - val_accuracy: 0.8627 - val_loss: 0.3266
Epoch 3/10
[1m8673/8673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2711s[0m 312ms/step - accuracy: 0.8631 - loss: 0.3249 - val_accuracy: 0.7820 - val_loss: 1.4216
Epoch 4/10
[1m8673/8673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2639s[0m 304ms/step - accuracy: 0.8667 - loss: 0.3172 - val_accuracy: 0.8593 - val_loss: 0.3456
Epoch 5/10
[1m8673/8673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4173s[0m 481ms/step - accuracy: 0.8688 - loss: 0.3117 - val_accuracy: 0.7595 - val_loss: 1.3200
Epoch 6/10
[1m8673/8673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29532s[0m 3s/step - accuracy: 0.8711 - loss: 0.3069 - val_accuracy: 0.8737 - val_l



In [13]:
from tensorflow.keras.models import load_model

model = load_model("cancer_model.h5")



In [15]:
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from imutils import paths

# Make sure TEST_PATH is already defined earlier
testPaths = list(paths.list_images(TEST_PATH))
lenTest = len(testPaths)

print(" Now evaluating the model...")
testGen.reset()
import math
pred_indices = model.predict(testGen, steps=math.ceil(lenTest / BS), verbose=1)

# Use correct method name and remove +1
pred_indices = np.argmax(pred_indices, axis=1)

#  Classification report
print(classification_report(testGen.classes, pred_indices, target_names=testGen.class_indices.keys()))

# Confusion matrix and metrics
cm = confusion_matrix(testGen.classes, pred_indices)
total = np.sum(cm)
accuracy = (cm[0, 0] + cm[1, 1]) / total
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])

print("Confusion Matrix:\n", cm)
print(f"Accuracy     : {accuracy:.4f}")
print(f"Specificity  : {specificity:.4f}")
print(f"Sensitivity  : {sensitivity:.4f}")

#  Plot training loss and accuracy
N = NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), M.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), M.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), M.history["accuracy"], label="train_acc")  # fix spelling
plt.plot(np.arange(0, N), M.history["val_accuracy"], label="val_acc")

plt.title("Training Loss and Accuracy on the IDC Dataset")
plt.xlabel("Epoch No.")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")  # fix argument
plt.savefig("plt.png")        #  fix method name
plt.close()


 Now evaluating the model...


  self._warn_if_super_not_called()


[1m8632/8632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1629s[0m 189ms/step
              precision    recall  f1-score   support

           0       0.85      0.96      0.90    197777
           1       0.84      0.58      0.69     78423

    accuracy                           0.85    276200
   macro avg       0.85      0.77      0.79    276200
weighted avg       0.85      0.85      0.84    276200

Confusion Matrix:
 [[189095   8682]
 [ 32980  45443]]
Accuracy     : 0.8492
Specificity  : 0.5795
Sensitivity  : 0.9561


NameError: name 'M' is not defined

<Figure size 640x480 with 0 Axes>