Classification of diagnostic groups using SVM with and without
sequential feature selection.

This script provides two pipelines applied to different permutations of input features:
1. A baseline pipeline with all features.
2. A pipeline with feature selection using SequentialFeatureSelector.

In [None]:
# Imports

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.model_selection import LeaveOneOut, cross_val_score, cross_val_predict
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import classification_report

In [None]:
# Load data

# Path to the Excel file (update before running)
path_to_excel_file = "path/to/data.xlsx"

# Read dataset
df = pd.read_excel(path_to_excel_file)

# Keep only groups of interest (diagnostic classes)
df = df[df["Group"].isin([2, 3, 4, 5])]

In [None]:
# Define features and labels

# Labels (diagnostic groups)
y = df["Group"].to_numpy()

# Replace the list below with the actual feature names from your dataset
feature_list = ["list", "of", "all", "the", "features", "of", "interest"]

# Features
X = df[feature_list]

In [None]:
# Model and cross-validation setup

scaler = StandardScaler()
model = LinearSVC(C=0.25)
cv = LeaveOneOut()

### Pipeline 1: Baseline (all features, no selection)


In [None]:
# Scale features
X_scaled = scaler.fit_transform(X)

# Evaluate model with LOOCV
scores = cross_val_score(model, X_scaled, y, scoring="accuracy", cv=cv, n_jobs=-1)
print(f"Accuracy: {np.mean(scores):.3f} ± {np.std(scores):.3f}")

# Predictions for classification report
y_pred = cross_val_predict(model, X_scaled, y, cv=cv, n_jobs=-1)

# Print per-class precision, recall, F1-score
target_names = ["2", "3", "4", "5"]
print(classification_report(y, y_pred, target_names=target_names, digits=3))

### Pipeline 2: Sequential Feature Selection

In [None]:
# Feature selector (backward elimination, accuracy criterion)
selector = SequentialFeatureSelector(
    model,
    n_features_to_select="auto",
    direction="backward",
    scoring="accuracy",
    cv=cv,
    n_jobs=-1,
    tol=0,
)

# Scale features
X_scaled = scaler.fit_transform(X)

# Fit feature selector
selector.fit(X_scaled, y)

# Get selected features
selected_mask = selector.get_support(indices=True)
selected_features = X.columns[selected_mask]
print("Features selected:")
print(list(selected_features))

# Transform dataset to keep only selected features
X_selected = X[selected_features]
X_scaled = scaler.fit_transform(X_selected)

# Evaluate model with LOOCV
scores = cross_val_score(model, X_scaled, y, scoring="accuracy", cv=cv, n_jobs=-1)
print(f"Accuracy: {np.mean(scores):.3f} ± {np.std(scores):.3f}")

# Predictions for classification report
y_pred = cross_val_predict(model, X_scaled, y, cv=cv, n_jobs=-1)
print(classification_report(y, y_pred, target_names=target_names, digits=3))