<a href="https://colab.research.google.com/github/Sanarazaaa/Deep-Learning-Classification-of-Parkinson-s-Dataset-with-SHAP-Interpretability/blob/main/Deep_Learning_Classification_of_Parkinson%E2%80%99s_Dataset_with_SHAP_Interpretability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install TensorFlow if needed
# !pip install tensorflow

from google.colab import files
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import matplotlib.pyplot as plt

# -----------------------------
# Step 1: Upload CSV
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# Step 2: Load CSV
data = pd.read_csv(file_name)

# Step 3: Drop empty columns
data = data.drop(columns=[col for col in data.columns if data[col].isnull().sum() == len(data)])

# Step 4: Select target column (multi-class)
target_col = 'Replication'  # replace with your target column if different
y = data[target_col]

# Encode target if categorical
if y.dtype == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)

# Step 5: Select features
categorical_cols = data.select_dtypes(include=['object']).columns.tolist()
if target_col in categorical_cols:
    categorical_cols.remove(target_col)

X_cat = pd.get_dummies(data[categorical_cols])       # One-hot encode categorical
X_num = data.select_dtypes(include=['int64', 'float64'])  # Numeric columns

# Combine numeric and one-hot encoded features
X_processed = pd.concat([X_num, X_cat], axis=1)

# Store feature names before converting to numpy array
feature_names = X_processed.columns.tolist()

X = X_processed.values

# Ensure numeric type
X = X.astype('float32')
y = y.astype('int')

# Check shapes
print("Feature shape:", X.shape)
print("Target shape:", y.shape)
print("Target unique values:", np.unique(y))

# Step 6: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Build multi-class neural network
model = Sequential()
model.add(Input(shape=(X.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(len(np.unique(y)), activation='softmax'))  # number of classes

# Step 8: Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 9: Train model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.2)

# Step 10: Plot training & validation accuracy/loss
plt.figure(figsize=(10,5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.figure(figsize=(10,5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Step 11: Evaluate on test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

# Step 12: Predict on test set
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)  # get class with highest probability

# Step 13: Confusion matrix & classification report
cm = confusion_matrix(y_test, predicted_classes)
cr = classification_report(y_test, predicted_classes)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", cr)

# Step 14: Optional - first 10 predictions vs true labels
print("First 10 Predictions:", predicted_classes[:10])
print("First 10 True labels:", y_test[:10])

In [None]:
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc

# Binarize labels
y_test_bin = label_binarize(y_test, classes=np.unique(y))
pred_prob = model.predict(X_test)

plt.figure(figsize=(10,6))
for i in range(y_test_bin.shape[1]):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], pred_prob[:, i])
    plt.plot(fpr, tpr, label=f'Class {i} (AUC = {auc(fpr,tpr):.2f})')

plt.plot([0,1],[0,1],'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multi-class ROC Curves')
plt.legend()
plt.show()


In [None]:
from sklearn.manifold import TSNE

X_embedded = TSNE(n_components=2, random_state=42).fit_transform(X)
plt.figure(figsize=(8,6))
for class_val in np.unique(y):
    idx = np.where(y==class_val)
    plt.scatter(X_embedded[idx,0], X_embedded[idx,1], label=f'Class {class_val}')
plt.legend()
plt.title('t-SNE Projection of Parkinson\'s Features')
plt.show()


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

importances = rf.feature_importances_
for i, v in enumerate(importances):
    print(f"{feature_names[i]}: {v:.4f}")

# Optional: plot
import matplotlib.pyplot as plt
plt.barh(feature_names, importances)
plt.xlabel("Feature Importance")
plt.title("Random Forest Feature Importance")
plt.show()


In [None]:
import shap

explainer = shap.KernelExplainer(model.predict, X_train[:50])  # smaller sample for speed
shap_values = explainer.shap_values(X_test[:20])

shap.summary_plot(shap_values, X_test[:20], feature_names=feature_names)


In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=16, callbacks=[early_stop])


In [None]:
from sklearn.metrics import roc_auc_score, roc_curve

# After one-hot encoding your labels
from sklearn.preprocessing import label_binarize
y_test_bin = label_binarize(y_test, classes=np.unique(y))
y_pred_bin = model.predict(X_test)


In [None]:
import nbformat

# Load the current notebook
notebook_filename = '/content/your_notebook.ipynb'
nb = nbformat.read(notebook_filename, as_version=5)

# Remove broken widget metadata
if 'widgets' in nb['metadata']:
    nb['metadata'].pop('widgets')

# Save cleaned notebook
nbformat.write(nb, notebook_filename)
print("Cleaned notebook saved.")
