Support Vector Machine based approach - This classifies neurological disorders by learning the relationship between neurological conditions and the component frequencies of EEG data found using a Fourier transform

To run this file, edit `TRAINPATH` in the second block to match the name of the dataset used to train this model. Generate this with **fft** formatting, **15000** samples, and **balanced** formatting using `generate_transformed_dataset.ipynb`

In [None]:
import numpy as np
import pandas as pd
import sklearn.datasets as skdata
import sklearn.metrics as skmetrics
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.svm import SVC

In [2]:
# MAKE SURE TO CHANGE THIS TO THE LOCAL PATH TO DATA
TRAIN_PATH = "train_1998_samples_fft_0_to_10_hz_consensus_1.0_balanced.csv"

data = pd.read_csv(TRAIN_PATH)

data_np = data.to_numpy()

print(data_np.shape)

(1989, 2421)


In [4]:
num_input = data_np.shape[1] - 1

x = data_np[:, 0:num_input]
y = data_np[:, num_input]

# Shuffle the dataset based on sample indices
shuffled_indices = np.random.permutation(x.shape[0])

# Choose the first 80% as training set, next 10% as validation and the rest as testing
train_split_idx = int(0.80 * x.shape[0])
val_split_idx = int(0.90 * x.shape[0])

train_indices = shuffled_indices[0:train_split_idx]
val_indices = shuffled_indices[train_split_idx:val_split_idx]
test_indices = shuffled_indices[val_split_idx:]

# Select the examples from x and y to construct our training, validation, testing sets
x_train, y_train = x[train_indices, :], y[train_indices]
x_val, y_val = x[val_indices, :], y[val_indices]
x_test, y_test = x[test_indices, :], y[test_indices]

In [5]:
models = []
model_vals = []
kernels = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
tolerances = [1e-1, 1e-2, 1e-3]

# Train the models with different parameters
for kernel in kernels:
    print(f'Training model with {kernel} kernel')
    model = SVC(kernel=kernel, tol=1e-1, degree=10, max_iter=-1)

    # Train
    model.fit(x_train, y_train)
    score_train = model.score(x_train, y_train)
    print('Training Accuracy: {:.4f}'.format(score_train))

    # Validate
    score_val = model.score(x_val, y_val)
    print('Validation Accuracy: {:.4f}'.format(score_val))

    models.append(model)
    model_vals.append(score_val)

    # If train is perfect, don't bother retraining with finer tolerance
    if score_train == 1.0:
        break

# Choose the best model based on highest validation accuracy
best_model_idx = np.argmax(model_vals)
best_model = models[best_model_idx]

# Best solver on the test set
predictions_test = best_model.predict(x_test)
score_test = best_model.score(x_test, y_test)

print("The test results.... {:0.2f}%".format(score_test*100))


Training model with linear kernel
Training Accuracy: 1.0000
Validation Accuracy: 0.5477
The test results.... 46.73%
