<a href="https://colab.research.google.com/github/Hsayed1/Medical-Mnist-Image-Classifier/blob/main/Medical_Image_CNN_ReducedDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import os
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
%matplotlib inline
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from google.colab import drive
import shutil
from sklearn.metrics import classification_report
import random
from google.colab import drive
from tensorflow.keras.layers import Dense, Flatten
"""This code loads, preprocesses, and trains on the medical mnist dataset. In this file two experiments is done on a dataset of 9k images. Results have been reported."""

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
##importing data from drive
zip_content = zipfile.ZipFile('/content/drive/MyDrive/medicalimages.zip', 'r')
zip_content.extractall('data')
zip_content.close()

In [None]:
data = '/content/data/medicalimages'
batch_size = 32
img_height = 150
img_width = 150

In [None]:
#randomly sample 1.5k images from each class because dataset was too large for high epochs.

# Define the class names
class_names = ['AbdomenCT', 'BreastMRI', 'CXR', 'ChestCT', 'Hand', 'HeadCT']

# Define the target number of images per class
images_per_class = 1500

# Create a new directory for the reduced dataset
reduced_data_path = '/content/data/reduced_medicalimages'
os.makedirs(reduced_data_path, exist_ok=True)

# Loop through each class
for class_name in class_names:
    class_path = os.path.join(data, class_name)
    target_class_path = os.path.join(reduced_data_path, class_name)

    os.makedirs(target_class_path, exist_ok=True)

    # List all files in the class directory and shuffle the list to randomize it
    class_files = os.listdir(class_path)
    random.shuffle(class_files)

    # Copy the desired number of randomly selected images (1500) from the class directory to the reduced dataset directory
    for i in range(images_per_class):
        file_name = class_files[i]
        source_file_path = os.path.join(class_path, file_name)
        target_file_path = os.path.join(target_class_path, file_name)
        shutil.copyfile(source_file_path, target_file_path)

In [None]:
#split into training data
train = tf.keras.utils.image_dataset_from_directory(
    reduced_data_path,
    validation_split=0.3,  # 70% for training
    subset="training",
    seed=123,
    image_size=(img_height, img_width), #resize to 150x150
    batch_size=batch_size
)


In [None]:
#split into validation
val = tf.keras.utils.image_dataset_from_directory(
    reduced_data_path,
    validation_split=0.2,  # 20% for validation
    subset="validation",
    seed=123,
    image_size=(img_height, img_width), #resize to 150x150
    batch_size=batch_size
)

In [None]:
#split into testing
test = tf.keras.utils.image_dataset_from_directory(
    reduced_data_path,
    validation_split=0.1,  # 10% for testing
    subset="validation",  #
    seed=123,
    image_size=(img_height, img_width), #resize to 150x150
    batch_size=batch_size
)

In [None]:
num_classes = len(class_names)

In [None]:
# Creating a Sequential model for image classification
model = Sequential([
    # Normalizing pixel values to the range [0,1]
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),

    # Convolutional layers with relu activation and max pooling for feature extraction
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    # Flatten layer to prepare for dense layers
    layers.Flatten(),

    # Dense layers with relu activation and dropout for regularization
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),

    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),

    # Output layer with softmax activation for multi-class classification
    layers.Dense(num_classes, activation='softmax')
])

In [None]:
# Compiling the model with the Adam optimizer, Sparse Categorical Crossentropy loss,
# and accuracy as the evaluation metric
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
#This original cnn was tested in the final project file however due to the large dataset high epochs could not be done.
#Professor recommended to try out 10k image dataset with high epochs for an experiment. Results have been reported
epochs= 20
history = model.fit(
  train,
  validation_data=val,
  epochs=epochs
)

In [None]:
# Extracting and plotting accuracy and loss metrics from the training history
acc = history.history['accuracy']
print("accuracy: ", acc)
val_acc = history.history['val_accuracy']
print("val accuracy: ", val_acc)

loss = history.history['loss']
print("loss: ", loss)

val_loss = history.history['val_loss']
print("val loss: ", val_loss)

epochs_range = range(epochs)

# Plotting accuracy
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# Plotting loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()


In [None]:
# Evaluate the model on the test set
test_results = model.evaluate(test)

# Print the test results (loss and accuracy)
print("Test Loss:", test_results[0])
print("Test Accuracy:", test_results[1])

In [None]:
# Create resnet Sequential model
resnet = Sequential()

# Load ResNet50 model pre-trained on ImageNet data
pretrained = tf.keras.applications.ResNet50(include_top=False, input_shape=(150, 150, 3), pooling='avg', classes=6, weights='imagenet')

# Freeze the weights of the pre-trained ResNet50 layers to prevent further training
for layer in pretrained.layers:
    layer.trainable = False

# Add the pre-trained ResNet50 model to the Sequential model
resnet.add(pretrained)

# Flatten the output of the ResNet50 model
resnet.add(Flatten())

# Add a fully connected layer with 128 units and ReLU activation
resnet.add(Dense(128, activation='relu'))

# Add the output layer with 6 and softmax activation
resnet.add(Dense(6, activation='softmax'))


In [None]:
resnet.summary()

In [None]:
resnet.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [None]:
epochs=20
history = resnet.fit(
  train,
  validation_data=val,
  epochs=epochs
)

In [None]:
# Extracting and plotting accuracy and loss metrics from the training history
acc = history.history['accuracy']
print("accuracy: ", acc)
val_acc = history.history['val_accuracy']
print("val accuracy: ", val_acc)

loss = history.history['loss']
print("loss: ", loss)

val_loss = history.history['val_loss']
print("val loss: ", val_loss)

epochs_range = range(epochs)

# Plotting accuracy
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# Plotting loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()


In [None]:
# Evaluate the model on the test set
test_results = resnet.evaluate(test)

# Print the test results (loss and accuracy)
print("Test Loss:", test_results[0])
print("Test Accuracy:", test_results[1])