In [44]:
import mne
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import welch
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from processing_functions import ppt_id, create_epochs, freq_ind, create_numeric_labels, relative_band_power
warnings.simplefilter('ignore')

The following piece of code creates numerical labels for the target variable: 0 for Alzheimer's, 1 for Frontotemporal dementia, and 2 for healthy group

In [45]:
ppt_diagnostics = pd.read_csv('data/ds004504/participants.tsv',sep='\t')
target_labels = ppt_diagnostics['Group'].apply(create_numeric_labels).values
target_labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

The general pipeline is to import the data for the patient, create epochs of 4 s each with 50 percent overlap, then compute the average power spectrum for each of the five bands for all 19 channels, so we end up with $19*5 = 95$ features for each epoch. We then train a three nearest neighbor classifier on the patients. We choose leave one patient out cross validation, so we will train on all but one patients and test the accuracy of the method on this last patient. We record the accuracy of every run and then in the end, calculate the mean and the standard deviation of the procedure. 

In [46]:
epoch_length = 2000
overlap_ratio = 0.5
freq_bands = np.array([0.5,4.0,8.0,13.0,25.0,45.0])
sample_freq = 500 #hertz

The code below goes through all the participants and creates a list of features and target variables, which we will then use for training and validating our model. 

In [48]:
features = []
targets = []
for i in range(len(target_labels)):
    ppt = i + 1
    raw_data = mne.io.read_raw_eeglab('data/ds004504/derivatives/' + ppt_id(ppt)
                                  + '/eeg/' + ppt_id(ppt) + '_task-eyesclosed_eeg.set', preload = True)
    export = raw_data.to_data_frame()
    ppt_array = export.iloc[:,range(1,len(export.columns))].values
    del raw_data
    del export
    ppt_epochs = create_epochs(ppt_array,epoch_length,overlap_ratio)
    freqs, ppt_psd  = welch(ppt_epochs,fs=sample_freq, axis=1)
    ppt_rbp = relative_band_power(ppt_psd,freqs,freq_bands)
    ppt_rbp_reshaped = ppt_rbp.reshape((ppt_rbp.shape[0], -1))
    features += [ppt_rbp_reshaped]
    targets += [[target_labels[i]]*ppt_rbp.shape[0]]

In [49]:
# Just checking that the features and targets have the same shape. 
print(len(features), len(targets))
print(features[10].shape, len(targets[10]))

88 88
(383, 95) 383


We start by just training a two class classifier for Alzheimer's vs Healthy. It is easy to see that the first 65 patients correspond to those two classes so we will first use those two. 

In [78]:
cross_valid_acc = []
for i in range(65):
    # The following two arrays will store the target and features for the train set. We will append to it
    # as we go through various subjects. 
    train_X = np.concatenate(features[:i] + features[i+1:65])
    train_y = np.concatenate(targets[:i] + targets[i+1:65])
    
    
    # The following two arrays will store target and features for the test set. 
    test_X = features[i]
    test_y = targets[i]
    
    # scaling the features to make sure they have same mean and standard deviation
    
    scaler = StandardScaler()
    train_X = scaler.fit_transform(train_X)
    
    # We now train the classifier on the test data
    ThreeNN = KNeighborsClassifier(n_neighbors=3)
    ThreeNN.fit(train_X, train_y)
    
    
    test_X = scaler.transform(test_X)
    
    # This stores the accuracy on the test data
    cross_valid_acc += [ThreeNN.score(test_X, test_y)]


In [79]:
cross_valid_acc[0:5]

[0.6510067114093959,
 0.45569620253164556,
 0.5526315789473685,
 0.8977272727272727,
 0.513715710723192]

In [80]:
np.mean(cross_valid_acc), np.std(cross_valid_acc)

(0.6668528055353906, 0.2222600102467725)

Now we train a three class classifier on the same data. 

In [74]:
cross_valid_acc_3class = []
for i in range(len(target_labels)):
    
    # The following two arrays will store the target and features for the train set. We will append to it
    # as we go through various subjects. 
    train_X = np.concatenate(features[:i] + features[i+1:])
    train_y = np.concatenate(targets[:i] + targets[i+1:])
    
    
    # The following two arrays will store target and features for the test set. 
    test_X = features[i]
    test_y = targets[i]
    
    # scaling the features to make sure they have same mean and standard deviation
    
    scaler = StandardScaler()
    train_X = scaler.fit_transform(train_X)
    
    # We now train the classifier on the test data
    ThreeNN = KNeighborsClassifier(n_neighbors=3)
    ThreeNN.fit(train_X, train_y)
    
    
    test_X = scaler.transform(test_X)
    
    # This stores the accuracy on the test data
    cross_valid_acc_3class += [ThreeNN.score(test_X, test_y)]


In [75]:
cross_valid_acc_3class[0:10]

[0.5637583892617449,
 0.35443037974683544,
 0.4342105263157895,
 0.4914772727272727,
 0.513715710723192,
 0.17777777777777778,
 0.599476439790576,
 0.8025316455696202,
 0.3770491803278688,
 0.1674491392801252]

In [76]:
np.mean(cross_valid_acc_3class), np.std(cross_valid_acc_3class)

(0.45720000208854844, 0.25162364663428294)