# Setting up the environment

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from google.colab import files

# Upload your Kaggle API token JSON file
uploaded = files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d paultimothymooney/breast-histopathology-images

In [None]:
!ls

In [None]:
!unzip -q breast-histopathology-images.zip -d dataset

## Importing modules

In [None]:
# Basic libraries
import numpy as np
import random
from os import listdir
from PIL import Image

# Preprocessing/Visualization
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from keras.utils import to_categorical

# Model creation
import tensorflow as tf
import keras
from keras import layers

# Evaluatiom
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [None]:
# Defining directories

base_path = "dataset/IDC_regular_ps50_idx5/"
files = listdir(base_path)

# Length of the samples is usually the no of patients

print("Total no of patients: "+ str(len(files)))

In [None]:
# Saving the data source into an array

dataset = []

for i in range(len(files)):
  patient_id = files[i]
  for c in [0,1]:
    patientid_path = base_path + patient_id
    class_path = patientid_path + "/" + str(c) + "/"
    samples = listdir(class_path)
    for picture in samples:
      image_path = class_path + picture
      dataset.append([image_path, c])

In [None]:
print("No. of images: " + str(len(dataset)))

In [None]:
# Presentation of dataset 2d array

dataset[0]

In [None]:
# Limit the dataset due to perfomance concerns

limit = len(dataset) / 8
dataset = dataset[:int(limit)]

len(dataset)

In [None]:
# Data Visualization

# Load an image

image_path = dataset[0][0]
label = dataset[0][1]
image = Image.open(image_path)

# Dimensions of the image
print("Width and height respectively: {} Pixels" .format(image.size))

In [None]:
# Show the first image in the dataset

plt.figure(figsize=(3, 3))

plt.imshow(image)
plt.title("First Image Sample")

plt.show()

In [None]:
# Seperate the data by class

BENIGNdata = [img for img, label in dataset if label == 0]
MALIGNANTdata = [img for img, label in dataset if label == 1]

BENIGNlabels = [label for img, label in dataset if label == 0]
MALIGNANTlabels = [label for img, label in dataset if label == 1]

In [None]:
# Get a sample of images each type of dataset

BENIGNsample = random.sample(BENIGNdata, 50)
MALIGNANTsample = random.sample(MALIGNANTdata, 50)

In [None]:
# Create a figure with subplots
fig, ax = plt.subplots(5, 10, figsize=(20, 10))

# Loop through the subplots
for n in range(5):
    for m in range(10):
        # Calculate the index based on row and column
        idx = m + 10 * n

        if idx < len(BENIGNsample):
            # Open the image using PIL
            image = Image.open(BENIGNsample[idx])

            # Display the image on the current subplot
            ax[n, m].imshow(image)
            ax[n, m].grid(False)
        else:
            # If there are not enough images to fill the grid, remove the empty subplot
            fig.delaxes(ax[n, m])

# Adjust spacing between subplots
plt.tight_layout()

# Show the grid of healthy patches
plt.show()

In [None]:
# Create a figure with subplots
fig, ax = plt.subplots(5, 10, figsize=(20, 10))

# Loop through the subplots
for n in range(5):
    for m in range(10):
        # Calculate the index based on row and column
        idx = m + 10 * n

        if idx < len(MALIGNANTsample):
            # Open the image using PIL
            image = Image.open(MALIGNANTsample[idx])

            # Display the image on the current subplot
            ax[n, m].imshow(image)
            ax[n, m].grid(False)
        else:
            # If there are not enough images to fill the grid, remove the empty subplot
            fig.delaxes(ax[n, m])

# Adjust spacing between subplots
plt.tight_layout()

# Show the grid of healthy patches
plt.show()

In [None]:
# Class distribution


labels = ["BENIGN", "MALIGNANT"]
counts = [len(BENIGNdata), len(MALIGNANTdata)]
colors = ["green", "orange"]

total_samples = sum(counts)
percentages = [(count / total_samples) * 100 for count in counts]

In [None]:
plt.figure(figsize=(8, 6))
plt.bar(labels, counts, color=colors)
plt.xlabel("Class")
plt.ylabel("Count")
plt.title("Class Distribution")
plt.show()

# Data Preprocessing

In [None]:
# Resizing the samples

target_size = (50, 50)
resizedBENIGN = []
resizedMALIGNANT = []

for image_path in BENIGNdata:
    image = Image.open(image_path)
    new_image = image.resize(target_size, Image.LANCZOS)  # Resize with anti-aliasing for better quality
    resizedBENIGN.append(new_image)

for image_path in MALIGNANTdata:
    image = Image.open(image_path)
    new_image = image.resize(target_size, Image.LANCZOS)  # Resize with anti-aliasing for better quality
    resizedMALIGNANT.append(new_image)

In [None]:
# Normalize the dataset pixel values

BENIGNdataset = np.array([np.array(image) / 255.0 for image in resizedBENIGN])
MALIGNANTdataset = np.array([np.array(image) / 255.0 for image in resizedMALIGNANT])

In [None]:
# Shuffle dataset

BENIGNdataset = shuffle(BENIGNdataset, random_state=42)
MALIGNANTdataset = shuffle(MALIGNANTdataset, random_state=42)

In [None]:
# Shape of the dataset

print("BENIGNdataset shape: {}" .format(BENIGNdataset.shape))
print("MALIGNANTdataset shape: {}" .format(MALIGNANTdataset.shape))

In [None]:
# Split the data

# Split into training and testing

BENIGNtrain, BENIGNtemp, BENIGNtrain_labels, BENIGNtemp_labels = train_test_split(
    BENIGNdataset, BENIGNlabels, test_size= 0.2, stratify= BENIGNlabels, random_state= 42
)

MALIGNANTtrain, MALIGNANTtemp, MALIGNANTtrain_labels, MALIGNANTtemp_labels = train_test_split(
    MALIGNANTdataset, MALIGNANTlabels, test_size= 0.2, stratify= MALIGNANTlabels, random_state= 42
)

# Split validation and test data

BENIGNval, BENIGNtest, BENIGNval_labels, BENIGNtest_labels = train_test_split(
    BENIGNtemp, BENIGNtemp_labels, test_size= 0.5, stratify= BENIGNtemp_labels, random_state= 42
)

MALIGNANTval, MALIGNANTtest, MALIGNANTval_labels, MALIGNANTtest_labels = train_test_split(
    MALIGNANTtemp, MALIGNANTtemp_labels, test_size= 0.5, stratify= MALIGNANTtemp_labels, random_state= 42
)


# Combine the couples into a one

train_data = np.concatenate((BENIGNtrain, MALIGNANTtrain), axis= 0)
train_labels = np.concatenate((BENIGNtrain_labels, MALIGNANTtrain_labels), axis= 0)
val_data = np.concatenate((BENIGNval, MALIGNANTval), axis= 0)
val_labels = np.concatenate((BENIGNval_labels, MALIGNANTval_labels), axis= 0)
test_data = np.concatenate((BENIGNtest, MALIGNANTtest), axis= 0)
test_labels = np.concatenate((BENIGNtest_labels, MALIGNANTtest_labels), axis= 0)

In [None]:
# Reformat the shape for the labels

train_labels = to_categorical(train_labels, 2)
val_labels = to_categorical(val_labels, 2)
test_labels = to_categorical(test_labels, 2)

In [None]:
print('train_data shape : {}' .format(train_data.shape))
print('train_labels shape : {}' .format(train_labels.shape))
print('val_data shape : {}' .format(val_data.shape))
print('val_labels shape : {}' .format(val_labels.shape))
print('test_data shape : {}' .format(test_data.shape))
print('test_labels shape : {}' .format(test_labels.shape))

# Model Architecture

In [None]:
model = tf.keras.Sequential([
    # Convolutional Layers
    tf.keras.layers.Conv2D(32, (3, 3), padding="same", activation="relu", input_shape=(50, 50, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),  # Pooling layer after the first Conv2D
    tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D((3, 3), strides=2),
    tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3), strides=2),
    tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3), strides=2),

    # Flatten layer
    tf.keras.layers.Flatten(),

    # Fully Connected Layers
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(2, activation="softmax")
])

In [None]:
model.summary()

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate= 0.001)

model.compile(optimizer= opt, loss= "binary_crossentropy", metrics=['accuracy'])

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Metric to monitor for early stopping
    patience=5,           # Number of epochs with no improvement after which training will stop
    restore_best_weights=True  # Restore the model weights from the epoch with the best validation loss
)

# Model training

In [None]:
history = model.fit(train_data, train_labels, validation_data= (val_data, val_labels), epochs= 25, batch_size= 148, callbacks=[early_stopping])

# Model Evaluation

In [None]:
model.evaluate(test_data, test_labels)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
Ydata = model.predict(test_data)
Ylabels = np.argmax(Ydata, axis=1)

In [None]:
Xlabels = np.argmax(test_labels, axis=1)

In [None]:
# Evaluation Matrices

accuracy = accuracy_score(Xlabels, Ylabels)
print(f'Accuracy: {accuracy*100:.2f}%')

precision = precision_score(Xlabels, Ylabels)
print(f'Precision: {precision:.2f}')

recall = recall_score(Xlabels, Ylabels)
print(f'Recall: {recall:.2f}')

f1 = f1_score(Xlabels, Ylabels)
print(f'F1: {f1:.2f}')

# Confusion matrix

confusion_mat = confusion_matrix(Xlabels, Ylabels)
f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mat, annot=True, linewidths=0.01,cmap="BuPu",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
model.save("/drive/MyDrive")