In [33]:
from tensorflow.keras import backend as K
K.clear_session()


In [34]:
# Breast Cancer Classification using Resnet

## 1. Import Libraries

import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import resample
from PIL import Image
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import load_model



In [35]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

Load and Prepare Data
    Define Helper Functions

In [36]:
class CustomImageDataGenerator(Sequence):
    def __init__(self, image_filenames, image_directory, batch_size=32, shuffle=True, undersample=False, validation_split=0.25):
        self.image_filenames = np.array(image_filenames)  # Ensure it's a numpy array
        self.image_directory = image_directory
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.undersample = undersample
        self.validation_split = validation_split
        
        # Split data into training and validation
        self.train_filenames, self.val_filenames = self.split_data()
        self.train_labels = self.get_labels(self.train_filenames)
        self.val_labels = self.get_labels(self.val_filenames)
        
        # Initialize shuffling
        self.on_epoch_end()

    def split_data(self):
        num_val_samples = int(len(self.image_filenames) * self.validation_split)
        return (self.image_filenames[num_val_samples:], self.image_filenames[:num_val_samples])
    
    def get_labels(self, filenames):
        labels = []
        for img in filenames:
            if img[-5] == '0':
                labels.append(0)  # Label for non-cancer
            elif img[-5] == '1':
                labels.append(1)  # Label for cancer
        return np.array(labels)
    
    def __len__(self):
        return int(np.floor(len(self.train_filenames) / self.batch_size))
    
    def __getitem__(self, index):
        batch_filenames = self.train_filenames[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.train_labels[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__data_generation(batch_filenames, batch_labels)
        return X, y

    def __data_generation(self, batch_filenames, batch_labels):
        X = np.empty((self.batch_size, 224, 224, 3))
        y = np.empty((self.batch_size), dtype=int)
        
        for i, (filename, label) in enumerate(zip(batch_filenames, batch_labels)):
            image = Image.open(os.path.join(self.image_directory, filename))
            image = image.resize((224, 224))
            X[i,] = np.array(image) / 255.0
            y[i] = label
        
        if self.undersample:
            X, y = self.undersample_data(X, y)
        
        return X, y

    def undersample_data(self, X, y):
        minority_class = 1
        majority_class = 0
        
        X_minority = X[y == minority_class]
        y_minority = y[y == minority_class]
        X_majority = X[y == majority_class]
        y_majority = y[y == majority_class]

        minority_size = len(X_minority)
        if len(X_majority) > minority_size:
            X_majority_undersampled, y_majority_undersampled = resample(
                X_majority, y_majority,
                replace=False,
                n_samples=minority_size,
                random_state=42
            )
            X_balanced = np.concatenate([X_majority_undersampled, X_minority])
            y_balanced = np.concatenate([y_majority_undersampled, y_minority])
        else:
            # If majority class size is less than or equal to minority size, do not undersample
            X_balanced = np.concatenate([X_majority, X_minority])
            y_balanced = np.concatenate([y_majority, y_minority])
        
        return X_balanced, y_balanced
    
    def on_epoch_end(self):
        if self.shuffle:
            indices = np.arange(len(self.train_filenames))
            np.random.shuffle(indices)
            self.train_filenames = np.array(self.train_filenames)[indices]
            self.train_labels = np.array(self.train_labels)[indices]

Load Data

In [37]:
import glob


# Define the image directory
image_directory = 'C:/Users/DELL/josiah_project/breast-histopathology-images'

# Use glob to get all image filenames in the directory
image_filenames = glob.glob(image_directory + '/**/*.png', recursive=True)



Customize and Train Resnet50 Model
     Define the Model

In [38]:
def create_model():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
model.summary()


In [39]:
# Define an EarlyStopping callback
# Define callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=1e-7,
    restore_best_weights=True
)

plateau = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_delta=1e-7,
    cooldown=0,
    verbose=1
)

checkpoint_cb = ModelCheckpoint("E:/Model_output/best_model_resnet.keras", save_best_only=True, monitor="val_loss", mode="min")


lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 10))

Data Generators

In [40]:
# Initialize the data generators
train_datagen = CustomImageDataGenerator(
    image_filenames=image_filenames,
    image_directory=image_directory,
    batch_size=32,
    shuffle=True,
    undersample=True,
    validation_split=0.25
)

# Training data generator
train_generator = train_datagen

# Validation data generator
val_generator = CustomImageDataGenerator(
    image_filenames=train_datagen.val_filenames,
    image_directory=image_directory,
    batch_size=32,
    shuffle=False,
    undersample=False,
    validation_split=0  # Not necessary here
)


Train the Model

In [41]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,  # Specify the validation data generator
    class_weight={0: 1.0, 1: 2.5},  # Adjust class weights if needed
    callbacks=[early_stopping, plateau, checkpoint_cb]  # List of callbacks
)


Epoch 1/10
[1m  239/13008[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:05:55[0m 874ms/step - accuracy: 0.5022 - loss: 1.2541

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(val_generator)
print(f'Validation Loss: {loss}')
print(f'Validation Accuracy: {accuracy}')


In [None]:
model.save(r'C:\Users\DELL\josiah_project\DATAPIPE-LINE\output')

 Make Predictions

In [None]:
def predict_image(image_path):
    image = load_image(image_path)
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    prediction = model.predict(image)
    return 'Cancer' if prediction[0] > 0.5 else 'Non-cancer'

# Example usage
image_path = r'E:\JOSIAH CANCER DATASET_DONT F_TOUCH IT\MINI-DDSM-Complete-JPEG-8\Benign\0029\C_0029_1.LEFT_CC.jpg'
result = predict_image(image_path)
print(f'Prediction for the image: {result}')
