In [2]:
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# --- Paths ---
train_dir = Path("../Combined Dataset/train")
test_dir  = Path("../Combined Dataset/test")

# --- Image transforms ---
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # ensure 1-channel MRI
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

# --- Load datasets ---
train_ds = datasets.ImageFolder(root=train_dir, transform=transform)
test_ds  = datasets.ImageFolder(root=test_dir,  transform=transform)

# --- Dataloaders ---
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=32, shuffle=False)

# --- Print info ---
print("Classes:", train_ds.classes)
print("Train samples:", len(train_ds))
print("Test samples:", len(test_ds))


Classes: ['Mild Impairment', 'Moderate Impairment', 'No Impairment', 'Very Mild Impairment']
Train samples: 10240
Test samples: 1279


In [None]:
import numpy as np
import torch
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# --- Extract images and labels from DataLoaders ---
def extract_data(loader):
    """Convert PyTorch DataLoader to numpy arrays"""
    images_list = []
    labels_list = []
    for imgs, labels in loader:
        images_list.append(imgs.numpy())
        labels_list.append(labels.numpy())
    images = np.concatenate(images_list, axis=0)
    labels = np.concatenate(labels_list, axis=0)
    return images, labels

print("Extracting and flattening data...")
X_train, y_train = extract_data(train_loader)
X_test, y_test = extract_data(test_loader)

# Flatten 128x128 images to vectors
X_train_flat = X_train.reshape(X_train.shape[0], -1)  # (n_samples, 128*128)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

print(f"Train shape: {X_train_flat.shape}, Test shape: {X_test_flat.shape}")

# --- Standardization ---
print("\nStandardizing features...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flat)
X_test_scaled = scaler.transform(X_test_flat)

# --- PCA for dimensionality reduction ---
print("\nApplying PCA...")
pca = PCA(n_components=200, random_state=42)  # try 100-300 range
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
print(f"PCA components: {pca.n_components_}, Explained variance: {pca.explained_variance_ratio_.sum():.3f}")

# --- Grid Search with Stratified CV ---
print("\nPerforming grid search with stratified 5-fold CV...")

# Linear SVM
param_grid_linear = {
    'C': [0.01, 0.1, 1, 10, 100]
}
svm_linear = SVC(kernel='linear', random_state=42)
grid_linear = GridSearchCV(
    svm_linear, 
    param_grid_linear, 
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring='f1_macro',
    n_jobs=-1,
    verbose=1
)
grid_linear.fit(X_train_pca, y_train)

print(f"\nBest Linear SVM params: {grid_linear.best_params_}")
print(f"Best CV macro F1: {grid_linear.best_score_:.4f}")

# RBF SVM
param_grid_rbf = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1]
}
svm_rbf = SVC(kernel='rbf', random_state=42)
grid_rbf = GridSearchCV(
    svm_rbf,
    param_grid_rbf,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring='f1_macro',
    n_jobs=-1,
    verbose=1
)
grid_rbf.fit(X_train_pca, y_train)

print(f"\nBest RBF SVM params: {grid_rbf.best_params_}")
print(f"Best CV macro F1: {grid_rbf.best_score_:.4f}")

# --- Select best model and evaluate on test set ---
best_model = grid_linear if grid_linear.best_score_ > grid_rbf.best_score_ else grid_rbf
model_name = "Linear SVM" if best_model == grid_linear else "RBF SVM"

print(f"\n{'='*60}")
print(f"Best model: {model_name}")
print(f"{'='*60}")

y_pred = best_model.predict(X_test_pca)

# --- Evaluation metrics ---
accuracy = accuracy_score(y_test, y_pred)
macro_f1 = f1_score(y_test, y_pred, average='macro')

print(f"\nTest Set Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Macro F1: {macro_f1:.4f}")
print(f"\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=train_ds.classes))

# --- Confusion Matrix ---
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=train_ds.classes, 
            yticklabels=train_ds.classes)
plt.title(f'Confusion Matrix - {model_name}')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

# --- Per-class confusion matrices (normalized) ---
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
axes = axes.ravel()

for idx, class_name in enumerate(train_ds.classes):
    cm_class = confusion_matrix(y_test == idx, y_pred == idx)
    sns.heatmap(cm_class, annot=True, fmt='d', cmap='Greens', 
                ax=axes[idx], xticklabels=['Other', class_name], 
                yticklabels=['Other', class_name])
    axes[idx].set_title(f'{class_name}')
    axes[idx].set_ylabel('True')
    axes[idx].set_xlabel('Predicted')

plt.tight_layout()
plt.show()

Extracting and flattening data...
Train shape: (10240, 16384), Test shape: (1279, 16384)

Standardizing features...

Applying PCA...
PCA components: 200, Explained variance: 0.824

Performing grid search with stratified 5-fold CV...
Fitting 5 folds for each of 5 candidates, totalling 25 fits
