In [1]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

Set Parameters

In [2]:
train_dir = 'breastcancerdataset/train'
test_dir = 'breastcancerdataset/test'

img_width, img_height = 50, 50

num_train_benign = len(os.listdir(os.path.join(train_dir, 'benign')))
num_train_malignant = len(os.listdir(os.path.join(train_dir, 'malignant')))
num_test_benign = len(os.listdir(os.path.join(test_dir, 'benign')))
num_test_malignant = len(os.listdir(os.path.join(test_dir, 'malignant')))

print(f"Training data: {num_train_benign} benign, {num_train_malignant} malignant")
print(f"Testing data: {num_test_benign} benign, {num_test_malignant} malignant")

# Calculate class weights to address imbalance
total_train = num_train_benign + num_train_malignant
weight_for_0 = (1 / num_train_benign) * (total_train / 2.0)
weight_for_1 = (1 / num_train_malignant) * (total_train / 2.0)
class_weight = {0: weight_for_0, 1: weight_for_1}

print(f"Class weights: {class_weight}")

Training data: 158990 benign, 63028 malignant
Testing data: 39748 benign, 15758 malignant
Class weights: {0: 0.6982137241335933, 1: 1.7612648346766517}


Image Preprocessing

In [4]:
batch_size = 32
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary'
)

Found 222018 images belonging to 2 classes.
Found 55506 images belonging to 2 classes.


Creating the CNN Model

In [7]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),

    Dense(512, activation='relu'),
    Dropout(0.5),  # Dropout to prevent overfitting
    Dense(1, activation='sigmoid')  # Output layer (sigmoid for binary classification)
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

Train the model

In [8]:
steps_per_epoch = train_generator.samples // batch_size
validation_steps = test_generator.samples // batch_size

history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=15, 
    validation_data=test_generator,
    validation_steps=validation_steps,
    class_weight=class_weight
)

Epoch 1/15


  self._warn_if_super_not_called()


[1m6938/6938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 49ms/step - accuracy: 0.7792 - loss: 0.4855 - val_accuracy: 0.8244 - val_loss: 0.4057
Epoch 2/15
[1m   1/6938[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:24[0m 12ms/step - accuracy: 0.8438 - loss: 0.2744



[1m6938/6938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - accuracy: 0.8438 - loss: 0.2744 - val_accuracy: 0.8410 - val_loss: 0.3771
Epoch 3/15
[1m6938/6938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 32ms/step - accuracy: 0.8292 - loss: 0.3994 - val_accuracy: 0.8025 - val_loss: 0.4512
Epoch 4/15
[1m6938/6938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.6875 - loss: 0.5490 - val_accuracy: 0.8163 - val_loss: 0.4290
Epoch 5/15
[1m6938/6938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 32ms/step - accuracy: 0.8340 - loss: 0.3883 - val_accuracy: 0.8500 - val_loss: 0.3573
Epoch 6/15
[1m6938/6938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.7500 - loss: 0.6200 - val_accuracy: 0.8421 - val_loss: 0.3720
Epoch 7/15
[1m6938/6938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 32ms/step - accuracy: 0.8373 - loss: 0.3790 - val_accuracy: 0.8458 - val_loss: 0.3661
Epoch 8/15
[1m

In [10]:
model.save("breastcancer.h5")



In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

test_datagen = ImageDataGenerator(rescale=1./255)
gen = test_datagen.flow_from_directory('breastcancerdataset/test', target_size=(50, 50))

print(gen.class_indices)


Found 55506 images belonging to 2 classes.
{'benign': 0, 'malignant': 1}
