In [3]:
import os
import numpy as np
from PIL import Image
from tensorflow.keras.utils import Sequence

class BreastHistopathologyDataGenerator(Sequence):
    def __init__(self, dataset_dir, batch_size=32, target_size=(50, 50), shuffle=True):
        self.dataset_dir = dataset_dir
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.image_paths, self.labels = self._load_dataset()
        self.on_epoch_end()

    def _load_dataset(self):
        image_paths = []
        labels = []
        # Walk through the directory and find image paths
        for subdir, dirs, files in os.walk(self.dataset_dir):
            for file in files:
                if file.endswith(".png"):
                    # Get the class label from the directory name (0 or 1)
                    label = int(os.path.basename(subdir))
                    image_path = os.path.join(subdir, file)
                    image_paths.append(image_path)
                    labels.append(label)
        return np.array(image_paths), np.array(labels)

    def __len__(self):
        # Number of batches per epoch
        return int(np.floor(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        # Generate one batch of data
        batch_image_paths = self.image_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        
        # Load and preprocess images
        images = np.array([self._load_image(img_path) for img_path in batch_image_paths])
        
        return images, np.array(batch_labels)

    def _load_image(self, img_path):
        # Load and resize image
        img = Image.open(img_path)
        img = img.resize(self.target_size)
        img_array = np.array(img) / 255.0  # Normalize pixel values to [0, 1]
        return img_array

    def on_epoch_end(self):
        # Shuffle the data at the end of each epoch
        if self.shuffle:
            indices = np.arange(len(self.image_paths))
            np.random.shuffle(indices)
            self.image_paths = self.image_paths[indices]
            self.labels = self.labels[indices]

# Define the path to the dataset directory
dataset_dir = "/home/jovyan/breast/"

# Create an instance of the data generator
data_generator = BreastHistopathologyDataGenerator(dataset_dir, batch_size=32, target_size=(50, 50))

# # Example usage: Iterate over the generator to get batches of images
# for images, labels in data_generator:
#     print(f"Batch images shape: {images.shape}")
#     print(f"Batch labels: {labels}")


In [5]:
# Example model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification output
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model using the custom data generator
model.fit(data_generator, epochs=10,batch_size = 32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x3ff1adc61a0>

In [None]:
# Split the data generator into training and validation sets
train_data_generator = BreastHistopathologyDataGenerator(
    dataset_dir=dataset_dir,
    batch_size=32,
    target_size=(50, 50),
    shuffle=True
)

# Training the model
model.fit(
    train_data_generator,
    epochs=10,
    steps_per_epoch=len(train_data_generator)  # The number of batches per epoch
)


Epoch 1/10
Epoch 2/10
Epoch 3/10

In [7]:
# Create a validation data generator (same class as before)
val_data_generator = BreastHistopathologyDataGenerator(
    dataset_dir=dataset_dir,
    batch_size=32,
    target_size=(50, 50),
    shuffle=False  # Shuffle can be False for validation/test data
)

# Evaluate the model
loss, accuracy = model.evaluate(val_data_generator)
print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy * 100:.2f}%")


Validation Loss: 0.1598
Validation Accuracy: 93.66%


In [8]:
# Save the trained model to a file
model.save('breast_histopathology_cnn_model.h5')
print("Model saved successfully!")


Model saved successfully!


In [9]:
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('breast_histopathology_cnn_model.h5')


In [13]:
import numpy as np
from PIL import Image

def predict_image(model, img_path):
    img = Image.open(img_path)
    img = img.resize((50, 50))  # Resize to match model's input size
    img_array = np.array(img) / 255.0  # Normalize the image
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    prediction = model.predict(img_array)
    return prediction[0][0]

# Predict on a sample image
img_path = 'Test_image1.png'
prediction = predict_image(model, img_path)

if prediction > 0.5:
    print(f"The model predicts this image as malignant with confidence {prediction:.4f}")
else:
    print(f"The model predicts this image as benign with confidence {1 - prediction:.4f}")


The model predicts this image as benign with confidence 0.9989


In [14]:
import numpy as np
from PIL import Image

def predict_image(model, img_path):
    img = Image.open(img_path)
    img = img.resize((50, 50))  # Resize to match model's input size
    img_array = np.array(img) / 255.0  # Normalize the image
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    prediction = model.predict(img_array)
    return prediction[0][0]

# Predict on a sample image
img_path = 'Test_image2.png'
prediction = predict_image(model, img_path)

if prediction > 0.5:
    print(f"The model predicts this image as malignant with confidence {prediction:.4f}")
else:
    print(f"The model predicts this image as benign with confidence {1 - prediction:.4f}")


The model predicts this image as malignant with confidence 0.7227
