<h1>Explained computer-aided Diagnosis system with deep learning framework</h1>
<p>The aim of this project is to create a python implementation of a computer aided diagnostic system that upon providing a result will explain the reasoning behind the result to promote transparency and make understanding the result much more understandable for medical staff</p>

before we begin firstly we need to initialise our working area by first installing shap,pytorch and importing our dataset

In [None]:
%pip install shap
%pip install lime
%pip install torch 
%pip install matplotlib
%pip install pandas
%pip install numpy
%pip install scikit-learn

import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch import nn
from lime import lime_tabular 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import shap
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
import random

The necessary libraries have been installed and imported for data processing, data representation and mathematics, however we still need to load the data and clean it before we can use it

In [None]:
# Load dataset
data = pd.read_csv('multiple sclerosis dataset.csv')

# Remove rows with missing values 
#data = data.dropna()

# Alternatively we can use the average of the column to fill in missing values
data = data.fillna(data.mean())

# Select only numerical columns for analysis as our data is provided in numerical format
data = data.select_dtypes(include=[np.number])
data = data.drop(columns=['Unnamed: 0'])  # Drop non-informative columns

# Display basic information about the dataset
print(data.columns.tolist())
print(data.shape)
display(data.head())

<p>The data classifications are:</p>
<ul>
<li>Age: Age of the patient (in years)</li>
<li>Schooling: time the patient spent in school (in years)</li>
<li>Gender: 1=male, 2=female</li>
<li>Breastfeeding: 1=yes, 2=no, 3=unknown</li>
<li>Varicella: 1=positive, 2=negative, 3=unknown</li>
<li>Initial_Symptoms: 1=visual, 2=sensory, 3=motor, 4=other, 5= visual and sensory, 6=visual and motor, 7=visual and others, 8=sensory and motor, 9=sensory and other, </li>
<li>10=motor and other, 11=Visual, sensory and motor, 12=visual, sensory and other, 13=Visual, motor and other, 14=Sensory, motor and other, 15=visual,sensory,motor and other</li>
<li>Mono _or_Polysymptomatic: 1=monosymptomatic, 2=polysymptomatic, 3=unknown</li>
<li>Oligoclonal_Bands: 0=negative, 1=positive, 2=unknown</li>
<li>LLSSEP: 0=negative, 1=positive</li>
<li>ULSSEP:0=negative, 1=positive</li>
<li>VEP:0=negative, 1=positive</li>
<li>BAEP: 0=negative, 1=positive</li>
<li>Periventricular_MRI:0=negative, 1=positive</li>
<li>Cortical_MRI: 0=negative, 1=positive</li>
<li>Infratentorial_MRI:0=negative, 1=positive</li>
<li>Spinal_Cord_MRI: 0=negative, 1=positive</li>
<li>initial_EDSS:?</li>
<li>final_EDSS:?</li>
<li>Group: 1=CDMS (definitive MS), 2=non-CDMS </li>
</ul>

In [None]:
# Separate features and target variable
X = data.drop('group', axis=1)
y = data['group']

le = LabelEncoder()
y = le.fit_transform(y)
feature_names = X.columns.tolist()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors for inputs
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Simple feedforward neural network
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
    
# Initialize the neural network, loss function and optimizer
model = SimpleNN(input_size=18, hidden_size=60, output_size=6)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training the neural network
epochs = 343
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    
with torch.no_grad():
    preds = torch.argmax(model(X_test), dim=1)
    acc = (preds == y_test).sum().item() / len(y_test)
    print(f"Accuracy: {acc*100:.2f}%")
model.cpu()
model.eval()



The model can now be used to predict whether or not a person in our data has MS, but a prediction isn't reliable without explanation so shap and lime explanations will be implemented to explain the results of the overall set of predictions and a random instance of a prediction in the dataset, for both a local (LIME) and global (SHAP) representation of the data


In [None]:
# samples for explanation
X_train_sample = X_train[:50]
X_test_sample = X_test[:10]

#classifier function for SHAP and LIME 
classifier_fn = lambda x: torch.softmax(
    model(torch.tensor(x, dtype=torch.float32)), dim=1
).detach().numpy()

exp_shap = shap.DeepExplainer(model, X_train_sample)
shap_values = exp_shap.shap_values(X_test_sample)


lime_instance = random.randint(0, X_test_sample.shape[0]-1)

exp_lime = lime_tabular.LimeTabularExplainer(X_train_sample.numpy(), feature_names=feature_names, class_names=le.classes_, discretize_continuous=True)

exp_lime = exp_lime.explain_instance(
    X_test_sample[lime_instance].numpy(), predict_fn=classifier_fn, num_features=6)

lime_output = exp_lime.as_html(show_table=True)

# SHAP summary plot display
display(HTML(
    f"""<h2>SHAP Summary Plot</h2>"""))
shap.summary_plot(shap_values, X_test_sample.numpy(), feature_names=feature_names)

#lime explanation display (formatted because else it displays as black on dark background)
lime_prediction = le.classes_[np.argmax(classifier_fn(X_test_sample[lime_instance].numpy().reshape(1, -1)))]
display(HTML(
    f"""
    <h2>LIME Explanation for Instance {lime_instance}</h2>
    <div style="background-color: white;padding: 10px; ">
    {lime_output}
    <h2 style="color:{'lightgreen' if lime_prediction == 2 else 'red'};"> The model predicted that the patient: {'has definitive Multiple Sclerosis' if lime_prediction == 1 else 'does not have Multiple Sclerosis'} </h2>
</div>"""))