# Medical MNIST Image Classification

Hi all, this is an example attempt of classifying the medical mnist dataset, it would be highly recommended to have access to GPU's when computing as this is quite time consuming on a CPU

## Imports

In [2]:
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from skimage import io

2024-03-25 09:03:55.266476: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-25 09:03:55.266619: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-25 09:03:55.434256: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Load the dataset

Here we can create a set and assign each folder (key) to an index (value), where we can then create a df appending the category and the index relating to the folder we are interested in.

In [3]:
df_lst = []
cat_set = {}
dir_location = "../input/medical-mnist"
for idx, category in enumerate(os.listdir(dir_location)):
    cat_set[category] = idx
    for image in os.listdir("../input/medical-mnist/"+category):
        df_lst.append([category+"/"+image, cat_set[category]])
df = pd.DataFrame(df_lst)

## Visualize the df

as we can see we have a pd df which contains close to 60,000 records with 2 columns, where column 1 represents the images name wrt its parent dir, and column 2 represents the labelling of the data

In [4]:
df.head()

Unnamed: 0,0,1
0,AbdomenCT/003646.jpeg,0
1,AbdomenCT/003998.jpeg,0
2,AbdomenCT/001273.jpeg,0
3,AbdomenCT/001609.jpeg,0
4,AbdomenCT/007646.jpeg,0


## Visualising the dataset

Here we simply grab a couple of images from each dataset, by iterating through a pandas df, we can collect 5 images and then simpy join the path of these strings in the df with the main directory, where we can then output the resulting images, just to display what we are viewing

In [30]:
abdominal_ct_images = df[df[1] == 0][0][:5]
breast_mri_images = df[df[1] == 1][0][:5]
cxr_images = df[df[1] == 3][0][:5]
chest_ct_images = df[df[1] == 5][0][:5]
hand_ct_images = df[df[1] == 2][0][:5]
head_ct_images = df[df[1] == 4][0][:5]


dfs = [abdominal_ct_images, breast_mri_images, cxr_images, chest_ct_images, hand_ct_images, head_ct_images]
names = ['abdominal_ct_images', 'breast_mri_images', 'cxr_images', 'chest_ct_images', 'hand_ct_images', 'head_ct_images']
for i, df in enumerate(dfs, start=1):
    print(names[i - 1])
    count = 0 
    for value in df:
        plt.figure(figsize=(12, 4))
        image_path = os.path.join(dir_location, value)
        image = Image.open(image_path)
        
        plt.subplot(1, 6, count+1)
        plt.imshow(image)
        plt.title(f"Image {count}")
        plt.axis("off")
        plt.show()
        count+=1


## Setting up our Device and Hyperparameters:

In [5]:
device = torch.device('cpu')

#represents the nature of the image, for example coloured images would have an input of 3 as it would be a
#column vector of rgb, as this is a greyscale it would simply be a density of 0-1
size_of_input = 1

#refers to how big our 'step' size is when performing our optimization
learning_rate = .01

#as we have close to 60,000 elements, we will want to split the data into batches for our optimization
#hence we also need to account for the fact that some of these images will be used for testing/validation.
batch_size = 50

#the number of iterations we are interested in for each batch, Ie its an external loop before the inner loop of the batches
epochs = 10

#how many outputs of probabilites are we interested in
final_output_layer = 6

## Defining our CNN

starting from a 32x32 image we produce a 5x5 convolution/kernal, with 32 feature maps
this means in our first conv2d layer we have 32 feature maps both with size 28x28
we then perform max pooling with a 2x2 kernal map with a stride of 2. this leaves us with 32 feature maps with 14x14
        
we then take our 32 feature maps as input and use another 5x5 feature map where we plan to return 16 feature maps
this gives us 16 feature maps with 10x10 dimensions each

we then flatten our output of the cnn so we can use it in the fully connected layers, we also implement a dropout with a 10% chance liklihood that an element can be zeroed out
        
we then create a fully connected layer of size 16*5*5 as we have 16 feature maps of size 5x5 at this point and plan to output 120 nodes, where with some additional fully connected layers we plan to output the probability
        
        

In [20]:
class CNN(nn.Module):
    #mat1 and mat2 shapes cannot be multiplied (50x3136 and 784x64)
    def __init__(self, in_channels, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=32, kernel_size=3)
        self.pool1 = nn.MaxPool2d(2,2)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3)
        self.pool2 = nn.MaxPool2d(2,2)
        
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.2)
        
        self.lin1 = nn.Linear(64*7*7, 64)
        #self.lin1 = nn.Linear(16*7*7, 64)
        self.lin2 = nn.Linear(64, num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        
        return x

## Create a formal class where we can quickly utilize the information in the df

In [21]:
# class Dataset(Dataset):
    
#     def __init__(self, df, root_directory, transform=None):
#         self.annotations = df
#         self.root_directory = root_directory
#         self.transform = transform
        
#     def __len__(self):
#         return len(self.annotations)
    
#     def __getitem__(self, index):
#         img_path = os.path.join(self.root_directory, self.annotations.iloc[index, 0])
#         image = io.imread(img_path)
#         y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
        
#         if self.transform:
#             image = self.transform(image)
        
#         return (image, y_label)

class Dataset_Class(Dataset):
    def __init__(self, df, root_directory, transform=None):
        self.annotations = df
        self.root_directory = root_directory
        self.transform = transform
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_directory, self.annotations.iloc[index, 0])
        image = io.imread(img_path)
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
        
        if self.transform:
            image = self.transform(image)
        
        return (image, y_label)

In [22]:
new_dataset = Dataset_Class(df=df, root_directory=dir_location,transform=transforms.ToTensor())

In [23]:
len(new_dataset)

58954

## Creating the partitions of the dataset so it can be 80% training, 20% testing


as we can see we are just going to create a generic training and testing datasets, where we have a training size of 80% of the initial dataset and the test data set having the remaining 20%, we note that we will not be using a validation set in this example

In [24]:
len_train = len(new_dataset)*.8
len_test = len(new_dataset) - len_train
print(len_train, len_test)
print(len_train + len_test)
print(len_train + len_test == len(new_dataset))



47163.200000000004 11790.799999999996
58954.0
True


In [25]:
len_train_int = 47163
len_test_int = 11791
train_set, test_set = torch.utils.data.random_split(new_dataset,[len_train_int,len_test_int])

train_loader = DataLoader(train_set, batch_size=(batch_size), shuffle=True)
test_loader = DataLoader(test_set, batch_size=(batch_size), shuffle=True)

## Training the model

we first will initialise the network with the number of input channels, output classes and to the cpu

In [None]:
model = CNN(size_of_input, final_output_layer).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    for batch, (data, targets) in enumerate(train_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)

        #Forward
        scores = model(data)
        loss = criterion(scores, targets)

        #Backward
        optimizer.zero_grad()
        loss.backward()

        # Gradient descent
        optimizer.step()

    print(epoch, "Current Loss:", loss)


## Evaluating the model

In [None]:
def evaluate(loader, model):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            scores = model(x)
            _, pred = scores.max(1)
            correct += (pred == y).sum()
            total += pred.size(0)
        print("Accuracy:", correct/total*100, "%")
    
evaluate(train_loader, model)
evaluate(test_loader, model)