<a href="https://colab.research.google.com/github/Arunesh2004/ML_Projects_/blob/main/Breast_Cancer_Detection_Model(CNN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Upload your ZIP file containing the 'benign' and 'malignant' image folders
from google.colab import files
uploaded = files.upload()

Saving breast-cancer.zip to breast-cancer.zip


In [2]:
import zipfile
import os

# Unzip the uploaded dataset
with zipfile.ZipFile("breast-cancer.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/original")

# Check if extraction was successful
print("✅ Extracted to /content/original")
print("Contents:", os.listdir("/content/original"))


✅ Extracted to /content/original
Contents: ['breast-cancer']


In [3]:
import shutil
import random

# This function splits the images in a folder into training and testing sets
def split_data(source, train_dir, test_dir, split_ratio=0.8):
    # Filter out empty files
    files = [f for f in os.listdir(source) if os.path.getsize(os.path.join(source, f)) > 0]
    random.shuffle(files)  # Shuffle files randomly
    split_point = int(len(files) * split_ratio)
    train_files = files[:split_point]
    test_files = files[split_point:]

    # Copy files to train and test directories
    for file in train_files:
        shutil.copy(os.path.join(source, file), os.path.join(train_dir, file))
    for file in test_files:
        shutil.copy(os.path.join(source, file), os.path.join(test_dir, file))

# Create folders for the split data
base_path = '/content/breast_cancer_dataset'
classes = ['benign', 'malignant']

for category in classes:
    os.makedirs(f'{base_path}/train/{category}', exist_ok=True)
    os.makedirs(f'{base_path}/test/{category}', exist_ok=True)

    split_data(
        source=f'/content/original/breast-cancer/{category}',
        train_dir=f'{base_path}/train/{category}',
        test_dir=f'{base_path}/test/{category}'
    )


In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create training data generator with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

# Test generator with only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# Load training images from directory
train_generator = train_datagen.flow_from_directory(
    base_path + '/train',
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary'
)

# Load testing images from directory
test_generator = test_datagen.flow_from_directory(
    base_path + '/test',
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary'
)


Found 13695 images belonging to 2 classes.
Found 3424 images belonging to 2 classes.


In [5]:
import tensorflow as tf

# Define a Convolutional Neural Network
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),  # Dropout to prevent overfitting
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary output (benign vs malignant)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Show model architecture
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
from tensorflow.keras.callbacks import EarlyStopping

# Stop training early if validation loss doesn't improve
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=test_generator,
    callbacks=[early_stop]
)

  self._warn_if_super_not_called()


Epoch 1/20
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m508s[0m 1s/step - accuracy: 0.7072 - loss: 0.5880 - val_accuracy: 0.8388 - val_loss: 0.3786
Epoch 2/20
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m498s[0m 1s/step - accuracy: 0.8359 - loss: 0.3824 - val_accuracy: 0.8429 - val_loss: 0.3701
Epoch 3/20
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m495s[0m 1s/step - accuracy: 0.8601 - loss: 0.3464 - val_accuracy: 0.8689 - val_loss: 0.3142
Epoch 4/20
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m493s[0m 1s/step - accuracy: 0.8700 - loss: 0.3224 - val_accuracy: 0.8843 - val_loss: 0.2846
Epoch 5/20
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m479s[0m 1s/step - accuracy: 0.8799 - loss: 0.3028 - val_accuracy: 0.8981 - val_loss: 0.2670
Epoch 6/20
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m484s[0m 1s/step - accuracy: 0.8852 - loss: 0.2939 - val_accuracy: 0.8969 - val_loss: 0.2549
Epoch 7/20
[1m428/428

In [7]:
# Reinitialize generators without shuffling for accurate evaluation
train_generator = train_datagen.flow_from_directory(
    base_path + '/train',
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

test_generator = test_datagen.flow_from_directory(
    base_path + '/test',
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# Evaluate model on train and test data
train_loss, train_acc = model.evaluate(train_generator)
test_loss, test_acc = model.evaluate(test_generator)

print(f"✅ Train Accuracy: {train_acc * 100:.2f}%")
print(f"✅ Test Accuracy: {test_acc * 100:.2f}%")


Found 13695 images belonging to 2 classes.
Found 3424 images belonging to 2 classes.
[1m428/428[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 386ms/step - accuracy: 0.8946 - loss: 0.2617
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 274ms/step - accuracy: 0.8892 - loss: 0.2671
✅ Train Accuracy: 91.01%
✅ Test Accuracy: 91.00%


In [8]:
from tensorflow.keras.preprocessing import image
import numpy as np

# Load and prepare an image for prediction
img_path = '/content/original/breast-cancer/benign/9322_idx5_x1851_y1201_class0.png'  # Replace with your own image
img = image.load_img(img_path, target_size=(128, 128))
img_array = image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) / 255.0  # Normalize

# Predict class
prediction = model.predict(img_array)
print("🧠 Prediction:", "Malignant" if prediction[0][0] > 0.5 else "Benign")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
🧠 Prediction: Benign
