In [13]:
# Breast Cancer Classification using VGG19

## 1. Import Libraries

import numpy as np
import os
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import resample
from PIL import Image
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau



In [14]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

Load and Prepare Data
    Define Helper Functions

In [15]:
def load_image(image_path):
    image = Image.open(image_path)
    image = image.resize((224, 224))  # Resize to match VGG19 input
    return np.array(image) / 255.0  # Normalize pixel values to [0, 1]

def create_labels_and_images(non_cancer_imgs, cancer_imgs, image_directory):
    images = []
    labels = []

    for img in non_cancer_imgs:
        image_path = os.path.join(image_directory, img)
        image = load_image(image_path)
        images.append(image)
        labels.append(0)  # Label for non-cancer

    for img in cancer_imgs:
        image_path = os.path.join(image_directory, img)
        image = load_image(image_path)
        images.append(image)
        labels.append(1)  # Label for cancer

    return np.array(images), np.array(labels)

def undersample_majority_class(X, y, majority_class=0, minority_class=1):
    X_majority = X[y == majority_class]
    y_majority = y[y == majority_class]
    X_minority = X[y == minority_class]
    y_minority = y[y == minority_class]

    minority_size = len(X_minority)
    X_majority_undersampled, y_majority_undersampled = resample(
        X_majority, y_majority,
        replace=False,
        n_samples=minority_size,
        random_state=42
    )

    X_balanced = np.concatenate([X_majority_undersampled, X_minority])
    y_balanced = np.concatenate([y_majority_undersampled, y_minority])

    return X_balanced, y_balanced

def categorize_images(image_filenames):
    non_cancer_imgs = []
    cancer_imgs = []

    for img in image_filenames:
        if img[-5] == '0':
            non_cancer_imgs.append(img)
        elif img[-5] == '1':
            cancer_imgs.append(img)

    return non_cancer_imgs, cancer_imgs


Load Data

In [16]:
import glob

# Define the directory containing your images
image_directory = 'C:/Users/DELL/josiah_project/breast-histopathology-images/**/*.png'

# Use glob to get all image filenames in the directory
image_filenames = glob.glob(image_directory, recursive=True)


# Categorize images based on filenames
non_cancer_imgs, cancer_imgs = categorize_images(image_filenames)

# Define your image directory
image_directory = 'C:/Users/DELL/josiah_project/breast-histopathology-images/**/*.png'

# Create labels and images
X, y = create_labels_and_images(non_cancer_imgs, cancer_imgs, image_directory)
X, y = undersample_majority_class(X, y)


MemoryError: Unable to allocate 1.15 MiB for an array with shape (224, 224, 3) and data type float64

Customize and Train VGG19 Model
     Define the Model

In [None]:
def create_model():
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
model.summary()


Data Generators

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
    'data_directory',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training'
)
validation_generator = train_datagen.flow_from_directory(
    'data_directory',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)


Train the Model

In [None]:
# Define an EarlyStopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',          # Monitor the validation loss
    patience=5,                  # Number of epochs with no improvement after which training will be stopped
    min_delta=1e-7,              # Minimum change in the monitored quantity to be considered an improvement
    restore_best_weights=True,   # Restore model weights from the epoch with the best value of monitored quantity
)

# Define a ReduceLROnPlateau callback
plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',   # Monitor the validation loss
    factor=0.2,           # Factor by which the learning rate will be reduced (new_lr = lr * factor)
    patience=5,           # Number of epochs with no improvement after which learning rate will be reduced
    min_delta=1e-7,       # Minimum change in the monitored quantity to trigger a learning rate reduction
    cooldown=0,           # Number of epochs to wait before resuming normal operation after learning rate reduction
    verbose=1             # Verbosity mode (1: update messages, 0: no messages)
)

checkpoint_cb = ModelCheckpoint("E:/Model_output/best_model_vgg.keras", save_best_only=True, monitor="val_loss", mode="min")

lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))

In [None]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    class_weight={0: 1.0, 1: 2.5}  # Adjust weights if needed
    callbacks=[early_stopping, plateau, checkpoint_cb]
)

# Evaluate the model
loss, accuracy = model.evaluate(validation_generator)
print(f'Validation Loss: {loss}')
print(f'Validation Accuracy: {accuracy}')


 Make Predictions

In [None]:
def predict_image(image_path):
    image = load_image(image_path)
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    prediction = model.predict(image)
    return 'Cancer' if prediction[0] > 0.5 else 'Non-cancer'

# Example usage
image_path = r'E:\JOSIAH CANCER DATASET_DONT F_TOUCH IT\MINI-DDSM-Complete-JPEG-8\Benign\0029\C_0029_1.LEFT_CC.jpg'
result = predict_image(image_path)
print(f'Prediction for the image: {result}')
