<a href="https://colab.research.google.com/github/Pragnya08/Capsule-Network-and-CNN-for-Leukaemia-Detection/blob/main/cnn_leukaemia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import shutil
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import random
import cv2
import numpy as np
import tensorflow as tf
from google.colab.patches import cv2_imshow
from tqdm.auto import tqdm
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras import optimizers
from tensorflow.keras.utils import image_dataset_from_directory
import random as rn
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import custom_object_scope

In [None]:
!pip install kaggle
!mkdir -p ~/.kaggle
!kaggle datasets download -d andrewmvd/leukemia-classification
!unzip leukemia-classification.zip

In [None]:
# Set the base directory of your dataset
base_path = 'C-NMC_Leukemia'  # Adjust this to the path where your dataset is located

# Folds and categories
folds = ['fold_0', 'fold_1', 'fold_2']
categories = ['all', 'hem']

# Path to store combined training data
combined_train_path = os.path.join(base_path, 'combined_training_data')
if not os.path.exists(combined_train_path):
    os.mkdir(combined_train_path)
    for category in categories:
        os.mkdir(os.path.join(combined_train_path, category))

# Combine images from all folds into one directory for each category
for fold in folds:
    for category in categories:
        source_path = os.path.join(base_path, 'training_data', fold, category)
        target_path = os.path.join(combined_train_path, category)
        if os.path.exists(source_path):
            for file in os.listdir(source_path):
                if file.endswith('.bmp'):
                    shutil.copy(os.path.join(source_path, file), target_path)

# Function to count images in a directory
def count_images_in_directory(directory_path):
    return len([file for file in os.listdir(directory_path) if file.endswith('.bmp')])

# Count images in combined training data
image_counts = {category: count_images_in_directory(os.path.join(combined_train_path, category)) for category in categories}
print("Total number of cancer cell images (all):", image_counts['all'])
print("Total number of normal cell images (hem):", image_counts['hem'])


print("Total number of images in training:", sum(image_counts.values()))


In [None]:
# Path to the combined training data
combined_train_path = os.path.join(base_path, 'combined_training_data')

# Categories
categories = ['all', 'hem']

# Split data
def split_data(category, train_split=0.70, val_split=0.20, test_split=0.10):
    files = [file for file in os.listdir(os.path.join(combined_train_path, category)) if file.endswith('.bmp')]
    train_val, test = train_test_split(files, test_size=test_split, random_state=42)
    train, val = train_test_split(train_val, test_size=val_split/(train_split+val_split), random_state=42)
    return train, val, test

# Prepare directories for split datasets
for subset in ['train', 'validation', 'test']:
    subset_path = os.path.join(base_path, subset)
    if not os.path.exists(subset_path):
        os.mkdir(subset_path)
    for category in categories:
        category_path = os.path.join(subset_path, category)
        if not os.path.exists(category_path):
            os.mkdir(category_path)

# Split and distribute files
for category in categories:
    train, val, test = split_data(category)
    for file, subset in zip([train, val, test], ['train', 'validation', 'test']):
        for img in file:
            shutil.copy(os.path.join(combined_train_path, category, img),
                        os.path.join(base_path, subset, category))

# Count and print the number of images in each subset
for subset in ['train', 'validation', 'test']:
    print(f"{subset.capitalize()} Set:")
    for category in categories:
        count = len(os.listdir(os.path.join(base_path, subset, category)))
        print(f"  {category} count: {count}")

In [None]:
def count_images_in_directory(directory_path):
    """
    Counts the number of .bmp files in a given directory and its subdirectories.
    """
    total_count = 0
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.bmp'):
                total_count += 1
    return total_count

In [None]:

# Paths
train_all_path = '/content/C-NMC_Leukemia/train/all'
train_hem_path = '/content/C-NMC_Leukemia/train/hem'
training_path = '/content/C-NMC_Leukemia/train'
validation_all_path = '/content/C-NMC_Leukemia/validation/all'
validation_hem_path = '/content/C-NMC_Leukemia/validation/hem'
validation_path = '/content/C-NMC_Leukemia/validation'
test_path = '/content/C-NMC_Leukemia/test'
training_count = count_images_in_directory(training_path)
print(f"Number of images in training path: {training_count}")



In [None]:
# Count images in each class
train_all_count = count_images_in_directory(train_all_path)
train_hem_count = count_images_in_directory(train_hem_path)

# Print the counts
print(f"Training 'all' count: {train_all_count}")
print(f"Training 'hem' count: {train_hem_count}")

# Visualize the data for training data
labels = ['all', 'hem']
counts = [train_all_count, train_hem_count]

plt.figure(figsize=(8, 6))
plt.bar(labels, counts, color=['blue', 'orange'])
plt.xlabel('Class')
plt.ylabel('Number of Images')
plt.title('Class Distribution in Training Data')
plt.show()



In [None]:
def get_image_paths(directory_path):
    """
    Returns a list of file paths for .bmp files in a given directory.
    """
    return [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith('.bmp')]


# Load image paths
train_all_images = get_image_paths(train_all_path)
train_hem_images = get_image_paths(train_hem_path)

# Calculate total and expected number of images
num_training_samples = len(train_all_images) + len(train_hem_images)

expected_samples_per_class = training_count // 2

# Output expected counts
print("Number of expected images per class:", expected_samples_per_class)

# Handle sub-sampling for the 'all' class
sampled_all = random.sample(train_all_images, min(len(train_all_images), expected_samples_per_class))
print("Number of sampled cancer cell images:", len(sampled_all))

# Calculate need and perform augmentation if necessary for 'hem' class
hem_samples_needed = max(0, expected_samples_per_class - len(train_hem_images))
if hem_samples_needed > 0 and len(train_hem_images) > 0:
    sampled_hem = random.sample(train_hem_images, hem_samples_needed)
    print("Number of sampled normal cell images to perform augmentation:", len(sampled_hem))
else:
    print("No augmentation needed for 'hem' class.")

In [None]:
def compute_gradient(image):
    '''
    Compute the gradient of the image
    :param image: the image for which to compute the gradient
    :return: gradient magnitude image
    '''
    image_tensor = tf.convert_to_tensor(image, dtype=tf.float32)
    # Expanding image dimensions from HxWxC to 1xHxWxC for batch processing
    image_tensor = tf.expand_dims(image_tensor, axis=0)
    # Using Sobel operator to compute gradients
    gx = tf.image.sobel_edges(image_tensor)[:,:,:,0,0]
    gy = tf.image.sobel_edges(image_tensor)[:,:,:,0,1]
    # Compute gradient magnitude
    grad_mag = tf.sqrt(tf.square(gx) + tf.square(gy))
    return np.squeeze(grad_mag.numpy(), axis=0)  # Remove batch dimension

def plot_heatmap(gradient, title="Gradient Magnitude Heatmap"):
    '''
    Plot the heatmap
    :param gradient: gradient magnitude image
    :param title: title of the plot
    '''
    plt.figure(figsize=(6, 6))
    plt.imshow(gradient, cmap='hot', interpolation='nearest')
    plt.colorbar()
    plt.axis('off')
    plt.title(title)
    plt.show()

# Example image path (adjust the path as needed)
example_image_path = '/content/C-NMC_Leukemia/train/hem/UID_H10_100_1_hem.bmp'
original_image = cv2.imread(example_image_path)
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

# Compute gradient and plot heatmap
gradient_image = compute_gradient(original_image)
plot_heatmap(gradient_image)

def save_heatmap(gradient, file_path):
    '''
    Save the heatmap to a file
    :param gradient: gradient magnitude image
    :param file_path: path to save the heatmap
    '''
    plt.imshow(gradient, cmap='hot')
    plt.colorbar()
    plt.axis('off')
    # Save as PNG format
    plt.savefig(file_path, format='png')
    plt.close()

In [None]:
def random_flip_or_rotation(original_image):
    '''
    Randomly rotates or flips the image
    :param original_image: the image on which to perform the transformation
    :return: the transformed image
    '''
    if random.randint(0, 1):
        if random.randint(0, 1):
            new_image = tf.image.flip_left_right(original_image)
        else:
            new_image = tf.image.flip_up_down(original_image)
    else:
        k = random.randint(1, 3)
        new_image = tf.image.rot90(original_image, k)
    return np.asarray(new_image)


In [None]:
# Example image path (adjust the path as needed)
example_image_path = '/content/C-NMC_Leukemia/train/hem/UID_H10_100_1_hem.bmp'
original_image = cv2.imread(example_image_path)

vertical_flip = tf.image.flip_up_down(original_image)
horizontal_flip = tf.image.flip_left_right(original_image)
rotation_90 = tf.image.rot90(original_image, k=1)
rotation_180 = tf.image.rot90(original_image, k=2)
rotation_270 = tf.image.rot90(original_image, k=3)

fig = plt.figure(figsize=(10, 10))

fig.add_subplot(2, 3, 1)
plt.imshow(original_image)
plt.axis('off')
plt.title("Original")

fig.add_subplot(2, 3, 2)
plt.imshow(vertical_flip)
plt.axis('off')
plt.title("Flipped Vertically")

fig.add_subplot(2, 3, 3)
plt.imshow(horizontal_flip)
plt.axis('off')
plt.title("Flipped Horizontally")

fig.add_subplot(2, 3, 4)
plt.imshow(rotation_90)
plt.axis('off')
plt.title("Rotated by 90°")

fig.add_subplot(2, 3, 5)
plt.imshow(rotation_180)
plt.axis('off')
plt.title("Rotated by 180°")

fig.add_subplot(2, 3, 6)
plt.imshow(rotation_270)
plt.axis('off')
plt.title("Rotated by 270°")
plt.show()


In [None]:
def crop_and_resize_image(image):
    '''
    Crops the image with a thresholding technique and resizes it to remove black borders.
    :param image: image to crop
    :return: cropped and resized image
    '''
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)  # Apply threshold
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnt = contours[0]
    x, y, w, h = cv2.boundingRect(cnt)
    cropped_image = image[y:y+h, x:x+w]

    # Enlarge the image to get a square shape
    max_dimension = max(cropped_image.shape)
    vertical_gap = max_dimension - cropped_image.shape[0]
    top = bottom = vertical_gap // 2 if vertical_gap % 2 == 0 else vertical_gap // 2 + 1
    horizontal_gap = max_dimension - cropped_image.shape[1]
    left = right = horizontal_gap // 2 if horizontal_gap % 2 == 0 else horizontal_gap // 2 + 1

    return cv2.copyMakeBorder(cropped_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0)  # Padding with black


In [None]:
# Example image path (adjust the path as needed)
example_image_path = '/content/C-NMC_Leukemia/train/hem/UID_H10_100_1_hem.bmp'
img = cv2.imread(example_image_path)
cropped_image = crop_and_resize_image(img)

fig = plt.figure(figsize=(10, 5))

fig.add_subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title("Original Image")

fig.add_subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title("Cropped Image")
plt.show()


In [None]:
# Define paths
new_dataset_path = "/content/SplittedDataset"
training_path_new = new_dataset_path + "/training_set"
validation_path_new = new_dataset_path + "/validation_set"
test_path_new = new_dataset_path + "/test_set"

shutil.rmtree(new_dataset_path, ignore_errors=True)

# Create the structure
os.makedirs(training_path_new + "/all")
os.makedirs(training_path_new + "/hem")
os.makedirs(validation_path_new+ "/all")
os.makedirs(validation_path_new + "/hem")
os.makedirs(test_path_new + "/all")
os.makedirs(test_path_new +  "/hem")

In [None]:
sampled_all = [os.path.basename(path) for path in sampled_all]  # Example placeholder
hem_samples = [os.path.basename(path) for path in train_hem_images]  # Example placeholder
sampled_hem = [os.path.basename(path) for path in sampled_hem]  # Example placeholder
validation_set = []  # Example placeholder for validation samples
test_set = []  # Example placeholder for test samples


In [None]:
for sample in sampled_all:  # Store cropped cancer cell images
    img = cv2.imread(training_path + '/all/' + sample)
    cropped_image = crop_and_resize_image(img)
    cv2.imwrite(training_path_new + '/all/' + sample, cropped_image)

In [None]:

# Loop for normal cell images
for sample in hem_samples:
    img = cv2.imread(training_path + '/hem/' + sample)
    cropped_image = crop_and_resize_image(img)
    cv2.imwrite(training_path_new + '/hem/' + sample, cropped_image)

In [None]:
# Loop for augmented normal cell images
for sample in sampled_hem:
    img = cv2.imread(training_path + "/hem/" + sample)
    augmented_img = random_flip_or_rotation(img)
    cropped_image = crop_and_resize_image(augmented_img)
    cv2.imwrite(training_path_new + '/hem_augmented/' + sample, cropped_image)



In [None]:
validation_set = []
# Function to populate validation_set with filenames from the directory
def populate_validation_set(directory):
    validation_set = []
    for label in ['all', 'hem']:
        dir_path = os.path.join(directory, label)
        if not os.path.exists(dir_path):
            print(f"Directory does not exist: {dir_path}")
            continue
        for filename in os.listdir(dir_path):
            if filename.endswith(".bmp"):
                validation_set.append(os.path.join(label, filename))
    return validation_set

# Populate validation_set
validation_set = populate_validation_set(validation_path)

# Debugging: Print validation_set contents
print("Validation set samples:", validation_set)

# Ensure the new directories exist
os.makedirs(os.path.join(validation_path_new, "all"), exist_ok=True)
os.makedirs(os.path.join(validation_path_new, "hem"), exist_ok=True)


In [None]:
count_all = 0
count_hem = 0

# Processing validation_set images
for sample in validation_set:  # Store cropped validation set images
    label, filename = sample.split('/')
    img_path = os.path.join(validation_path, label, filename)
    print(f"Processing image: {img_path}")
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error reading image: {img_path}")
        continue
    cropped_image = crop_and_resize_image(img)
    save_path = os.path.join(validation_path_new, label, filename)
    print(f"Saving cropped image to: {save_path}")
    cv2.imwrite(save_path, cropped_image)

# Debugging: Verify saved images
print("Contents of the new validation directory (all):")
all_files = os.listdir(os.path.join(validation_path_new, "all"))
print(all_files)

print("Contents of the new validation directory (hem):")
hem_files = os.listdir(os.path.join(validation_path_new, "hem"))
print(hem_files)



In [None]:
count_all = 0
count_hem = 0

for root, dirs, files in os.walk(test_path):
    for file in files:
        if file.endswith('.bmp'):  # Ensures only BMP images are processed
            img_path = os.path.join(root, file)
            print(f"Processing image: {img_path}")
            img = cv2.imread(img_path)

            if img is not None:
                cropped_image = crop_and_resize_image(img)  # Ensure this function is defined elsewhere in your script
                label = 'all' if 'all' in root else 'hem'
                if label == 'all':
                    count_all += 1
                else:
                    count_hem += 1
                save_path = os.path.join(test_path_new, label, file)
                os.makedirs(os.path.dirname(save_path), exist_ok=True)
                cv2.imwrite(save_path, cropped_image)
                print(f"Cropped image saved to: {save_path}")
            else:
                print(f"Image {img_path} not found or could not be read.")

print(f"Total 'all' images processed: {count_all}")
print(f"Total 'hem' images processed: {count_hem}")

### CNN

In [None]:
DATASET_PATH = "/content/SplittedDataset"
TRAINING_PATH = DATASET_PATH + '/training_set'
TRAINING_ALL_PATH = TRAINING_PATH + '/all'
TRAINING_HEM_PATH = TRAINING_PATH + '/hem'
VALIDATION_PATH = DATASET_PATH + '/validation_set'
TEST_PATH = DATASET_PATH + '/test_set'

In [None]:
BATCH_SIZE = 32  # Reduced batch size
IMAGE_HEIGHT = 224  # Reduced image height
IMAGE_WIDTH = 224  # Reduced image width

In [None]:
def set_seed ():
  '''
  set_seed is used to obtain reproducible results using keras during the development phase
  '''
  seed = 46
  # The below is necessary for reproducible results of certain Python hash-based operations.
  os.environ["PYTHONHASHSEED"]="0"
  # The below is necessary for starting Numpy generated random numbers in a well-defined initial state.
  np.random.seed(seed)
  # The below is necessary for starting core Python generated random numbers in a well-defined state.
  rn.seed(seed)
  # The below tf.random.set_seed will make random number generation in TensorFlow have a well-defined initial state.
  tf.random.set_seed(seed)


def show_training_and_validation_performance(history):
  '''
  show_training_and_validation_performance is used to plot the performances during the training phase
  :param history: object in which are recorded all the events
  '''
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(acc))

  plt.plot(epochs, acc, 'bo', label='Training accuracy')
  plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
  plt.title('Training and validation accuracy')
  plt.legend()

  plt.figure()

  plt.plot(epochs, loss, 'bo', label='Training loss')
  plt.plot(epochs, val_loss, 'b', label='Validation loss')
  plt.title('Training and validation loss')
  plt.legend()

  plt.show()

def load_training_set (image_height, image_width, batch_size):
  '''
  load_training_set loads the training set from the "training_set" folder. The images are resized to height x width
  :param image_height: standard height of the images
  :param image_width: standard width of the images
  :param batch_size: size of each batch
  :return: the training set
  '''
  return image_dataset_from_directory(
    TRAINING_PATH,
    image_size = (image_height, image_width),
    batch_size = batch_size,
    class_names = ['hem', 'all'])
def load_validation_set (image_height, image_width, batch_size):
  '''
  load_validation_set loads the validation set from the "validation_set" folder. The images are resized to height x width
  :param image_height: standard height of the images
  :param image_width: standard width of the images
  :param batch_size: size of each batch
  :return: the validation set
  '''
  return image_dataset_from_directory(
    VALIDATION_PATH,
    image_size = (image_height, image_width),
    batch_size = batch_size,
    class_names = ['hem', 'all'])

def load_test_set(image_height, image_width, batch_size):
    '''
    load_test_set loads the test set from the "test_set" folder. The images are resized to height x width
    :param image_height: standard height of the images
    :param image_width: standard width of the images
    :param batch_size: size of each batch
    :return: the test set
    '''
    test_dataset = image_dataset_from_directory(
        TEST_PATH,
        label_mode='binary',
        image_size=(image_height, image_width),
        batch_size=batch_size,
        shuffle=False,
        class_names=['hem', 'all'])
    return test_dataset




def compile_model (model, optimizer='adamax', learning_rate = 0.001):
  '''
  compile_model is used to compile the current model
  :param model: model to compile
  :param optimizer: optimizer to be used
  :param learning_rate: learning rate parameter for the optimizer
  '''
  if optimizer == 'adamax':
    model.compile(loss="binary_crossentropy",
      optimizer=optimizers.Adamax(learning_rate=learning_rate),
      metrics=["accuracy"])
  elif optimizer == 'rmsprop':
    model.compile(loss="binary_crossentropy",
                  optimizer = optimizers.RMSprop(learning_rate=learning_rate),
                  metrics=['accuracy'])
  model.summary()

def run_model(model, epochs=20, patience=5, monitor='val_loss'):
    '''
    run_model is used to run the current model without saving it
    :param model: model to run
    :param epochs: how many epochs to do
    :param patience: patience value for Early Stopping
    :param monitor: what to monitor for Early Stopping
    '''
    callbacks_list = [
        tf.keras.callbacks.EarlyStopping(monitor=monitor, patience=patience)
    ]
    history = model.fit(train_dataset,
                        epochs=epochs,
                        validation_data=validation_dataset,
                        callbacks=callbacks_list)
    show_training_and_validation_performance(history)


def evaluate_model (model):
  '''
  evaluate_model is used to plot some statistics about the performance on the test set
  :param model: model to consider
  '''
  y_score = model.predict(test_dataset)
  y_pred = np.rint(y_score)
  y_true = tf.concat([labels_batch for data_batch, labels_batch in test_dataset], axis = 0)
  print("Classification report: ")
  print(metrics.classification_report(y_true,y_pred,digits = 4))
  metrics.ConfusionMatrixDisplay.from_predictions(y_true, y_pred)

  # ROC curve
  fpr,tpr,th = metrics.roc_curve(y_true,y_score)
  roc_auc = metrics.roc_auc_score(y_true,y_score)

  plt.figure()
  plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
  plt.plot([0, 1], [0, 1], linestyle='--', color='navy')
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  plt.title('ROC curve')
  plt.legend(loc="lower right")
  plt.show()



In [None]:

shapes = {}

total_entries = []
entries = os.listdir(TRAINING_ALL_PATH)
total_entries += entries
for entry in tqdm(entries):
  img = cv2.imread(TRAINING_ALL_PATH + '/' + entry)
  if img.shape <= (300, 300):
    shapes[(300,300)] = shapes.get((300,300), 0) + 1
  if img.shape <= (299, 299):
    shapes[(299,299)] = shapes.get((299,299), 0) + 1
  if img.shape <= (224, 224):
    shapes[(224,224)] = shapes.get((224,224), 0) + 1
entries = os.listdir(TRAINING_HEM_PATH)
total_entries += entries
for entry in tqdm(entries):
  img = cv2.imread(TRAINING_HEM_PATH + '/' + entry)
  if img.shape <= (300, 300):
    shapes[(300,300)] = shapes.get((300,300), 0) + 1
  if img.shape <= (299, 299):
    shapes[(299,299)] = shapes.get((299,299), 0) + 1
  if img.shape <= (224, 224):
    shapes[(224,224)] = shapes.get((224,224), 0) + 1

In [None]:
print("Number of training images: ", len(total_entries))
print("Common shapes: ", shapes)

In [None]:

set_seed()

train_dataset = load_training_set(IMAGE_HEIGHT, IMAGE_WIDTH, BATCH_SIZE) # with default values
validation_dataset = load_validation_set(IMAGE_HEIGHT, IMAGE_WIDTH, BATCH_SIZE) # with default values
test_dataset = load_test_set(IMAGE_HEIGHT, IMAGE_WIDTH, BATCH_SIZE) # with default values

### one dense layer with 256 neurons and one dropouts

In [None]:

np.random.seed(24)

inputs = keras.Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters=32, padding='same', kernel_size=(3,3), activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)
x = layers.Conv2D(filters=64, padding='same', kernel_size=(3,3), activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)
x = layers.Conv2D(filters=128, padding='same', kernel_size=(3,3), activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(3,3))(x)
x = layers.Conv2D(filters=256, padding='same', kernel_size=(3,3), activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(5,5))(x)
x = layers.Flatten()(x)
x = layers.Dense(256, activation='relu') (x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=inputs, outputs=outputs)

compile_model(model)
plot_model(model, to_file='cnn_architecture.png', show_shapes=True, show_layer_names=True, dpi=90)

In [None]:
run_model(model, epochs=50)

capusle - 128 change the size when running this


In [None]:
evaluate_model(model)

In [None]:
model.save('simplecnn.h5')

In [None]:
test_dataset = load_test_set(IMAGE_HEIGHT, IMAGE_WIDTH, BATCH_SIZE)
scratch_test_dataset = load_test_set(300, 300, BATCH_SIZE)

In [None]:
num_all_test = len(os.listdir(TEST_PATH + "/all"))
num_hem_test = len(os.listdir(TEST_PATH + "/hem"))

total_test = num_all_test + num_hem_test

In [None]:

print('total test cancer cell images:', num_all_test)
print('total test normal cell images:', num_hem_test)
print("--")
print("Total test images:", total_test)


In [None]:

cnn_model = load_model('/content/simplecnn.h5')


In [None]:
def evaluate_classifier(model, dataset):
    misclassified_indices = []
    correctly_classified_indices = []
    index = 0
    for images, labels in dataset:  # Assuming dataset is batched
        preds = model.predict(images)
        pred_labels = (preds.flatten() > 0.5).astype(int)
        true_labels = labels.numpy().astype(int)
        for i in range(len(pred_labels)):
            if pred_labels[i] == true_labels[i]:
                correctly_classified_indices.append(index)
            else:
                misclassified_indices.append(index)
            index += 1
    return correctly_classified_indices, misclassified_indices


def plot_images_by_indices(dataset, indices, title, labels_dict, max_images_per_figure=50):
    images_collected = []
    labels_collected = []
    for idx, (images, labels) in enumerate(dataset.unbatch()):
        if idx in indices:
            images_collected.append(images.numpy())
            labels_collected.append(int(labels.numpy()[0]))  # Ensure single element conversion
            if len(images_collected) == len(indices):
                break

    total_images = len(images_collected)
    for i in range(0, total_images, max_images_per_figure):
        end_index = min(i + max_images_per_figure, total_images)
        num_images = end_index - i
        cols = 5
        rows = (num_images + cols - 1) // cols
        plt.figure(figsize=(cols * 4, rows * 4))
        for j in range(num_images):
            ax = plt.subplot(rows, cols, j + 1)
            image_index = i + j
            ax.imshow(images_collected[image_index].astype('uint8'))
            ax.set_title(f"{title}\nIndex: {indices[image_index]}\nLabel: {labels_dict[labels_collected[image_index]]}", fontsize=10)
            ax.axis('off')
        plt.tight_layout()
        plt.show()



correct_cnn, incorrect_cnn = evaluate_classifier(cnn_model, test_dataset)

# Dictionary for label conversion
labels_dict = {0: 'hem', 1: 'all'}





In [None]:
plot_images_by_indices(test_dataset, incorrect_cnn, "Incorrectly Classified by CNN", labels_dict)


In [None]:
plot_images_by_indices(test_dataset, correct_cnn, "Correctly Classified by CNN", labels_dict)