In [1]:
## Import packages
import os
import mne
import numpy as np
import scipy as sp
import scipy.io as sio
import matplotlib.pyplot as plt

from scipy.signal import butter, sosfiltfilt, sosfreqz  
from scipy.io import loadmat
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_curve, auc

import scripts.classification_utils as utils
import scripts.CSP as csp
import scripts.SACSP as sacsp

In [2]:
## Define directories
data_dir = '/home/inffzy/Desktop/cogs189/cogs189_final_project/data'

In [3]:
## Raw data directory names
bc4_2a_processed_name = 'bci_competition_4_2a_processed'
bc3_3a_processed_name = 'bci_competition_3_3a_processed'

## Create list of all data paths
bc4_2a_processed_data_paths = []
bc3_3a_processed_data_paths = []

## Add bc4_2a data paths
for subject_idx in range(1, 10):
    subject_name = 'A0' + str(subject_idx) + 'T'
    bc4_2a_processed_data_paths.append(
        os.path.join(data_dir, bc4_2a_processed_name, subject_name + '.npz'))
    
## Add bc3_3a data paths
for subject_idx in range(1, 4):
    subject_name = 'bc3_3a_s' + str(subject_idx)
    bc3_3a_processed_data_paths.append(
        os.path.join(data_dir, bc3_3a_processed_name, subject_name + '.npz'))
    
all_data_paths = bc4_2a_processed_data_paths + bc3_3a_processed_data_paths

In [4]:
## Define constants
label_left_hand = 769
label_right_hand = 770
num_samples = 10
test_split = 0.33
random_state = 42
num_cross_val = 5

## Regular CSP Visualization

In [None]:
data_npz = np.load(all_data_paths[0])
data = data_npz['processed_motor_imagery_data']
labels = data_npz['descriptions']

## Extract left and right hand epochs
data = data[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]
labels = labels[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]

print(data.shape)

data_ds = np.mean(data, axis=-1)
print(data_ds.shape)

data_ds_c1 = data_ds[labels == label_left_hand]
data_ds_c2 = data_ds[labels == label_right_hand]

plt.scatter(data_ds_c1[:, 0], data_ds_c1[:, 1])
plt.scatter(data_ds_c2[:, 0], data_ds_c2[:, 1])

In [None]:
data_c1 = data[labels == label_left_hand]
data_c2 = data[labels == label_right_hand]
csp_transform = csp.CSP(data_c1, data_c2, n_top=1, n_bot=1)
data_ = csp.apply_CSP(csp_transform, data)

print(data_.shape)

data_ds = np.mean(data_, axis=-1)
print(data_ds.shape)

data_ds_c1 = data_ds[labels == label_left_hand]
data_ds_c2 = data_ds[labels == label_right_hand]

plt.scatter(data_ds_c1[:, 0], data_ds_c1[:, 1])
plt.scatter(data_ds_c2[:, 0], data_ds_c2[:, 1])

In [None]:
data_c1 = data[labels == label_left_hand]
data_c2 = data[labels == label_right_hand]
csp_transform = csp.CSP2(data_c1, data_c2, n_top=1, n_bot=1)
data_ = csp.apply_CSP(csp_transform, data)

print(data_.shape)

data_ds = np.mean(data_, axis=-1)
print(data_ds.shape)

data_ds_c1 = data_ds[labels == label_left_hand]
data_ds_c2 = data_ds[labels == label_right_hand]

plt.scatter(data_ds_c1[:, 0], data_ds_c1[:, 1])
plt.scatter(data_ds_c2[:, 0], data_ds_c2[:, 1])

## Binary Classification with LDA

In [None]:
## results records training accuracy, cross validation average score, and testing accuracy for each subject
results = np.zeros((len(all_data_paths), 3)) 

for i, data_path in enumerate(all_data_paths):
    
    ## Load data
    data_npz = np.load(data_path)
    data = data_npz['processed_motor_imagery_data']
    labels = data_npz['descriptions']
    
    ## Extract left and right hand epochs
    data = data[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]
    labels = labels[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]

    ## Downsample with windowed means
    data_ds = utils.windowed_means(data, num_samples)
    
    ## Flatten data
    data_ds_flattened = utils.flatten_dim12(data_ds)
    
    ## Prepare training and testing data
    X_train, X_test, y_train, y_test = train_test_split(data_ds_flattened, 
                                                        labels, 
                                                        test_size=test_split, 
                                                        shuffle=True,
                                                        random_state=random_state)
    
    ## Classification
    clf = utils.LDA_classifier(X_train, y_train, cross_val=num_cross_val)
    cross_val_score_avg = clf.train_binary()
    predictions, train_accuracy = clf.test_binary()
    predictions, test_accuracy = clf.test_binary(X_test, y_test)
    results[i, :] = [train_accuracy, cross_val_score_avg, test_accuracy]
    print()
    
results_avg = np.mean(results, axis=0)

In [None]:
print(results)
print(results_avg)

## Binary Classification with Regular CSP + LDA

In [None]:
## results records training accuracy, cross validation average score, and testing accuracy for each subject
results = np.zeros((len(all_data_paths), 3)) 

for i, data_path in enumerate(all_data_paths):
    
    ## Load data
    data_npz = np.load(data_path)
    data = data_npz['processed_motor_imagery_data']
    labels = data_npz['descriptions']
    
    ## Extract left and right hand epochs
    data = data[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]
    labels = labels[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]
    
    ## Prepare training and testing data
    X_train, X_test, y_train, y_test = train_test_split(data, 
                                                        labels, 
                                                        test_size=test_split, 
                                                        shuffle=True,
                                                        random_state=random_state)
    
    ## Classification
    clf = csp.CSP_LDA_classifier(X_train, y_train, cross_val=num_cross_val, num_samples=num_samples)
    cross_val_score_avg = clf.train_binary()
    predictions, train_accuracy = clf.test_binary()
    predictions, test_accuracy = clf.test_binary(X_test, y_test)
    results[i, :] = [train_accuracy, cross_val_score_avg, test_accuracy]
    print()

results_avg = np.mean(results, axis=0)

In [None]:
print(results)
print(results_avg)

## Binary Classification with Spectrally Adaptive CSP + LDA

In [None]:
## results records training accuracy, cross validation average score, and testing accuracy for each subject
results = np.zeros((len(all_data_paths), 3)) 

for i, data_path in enumerate(all_data_paths):
    
    ## Load data
    data_npz = np.load(data_path)
    data = data_npz['processed_motor_imagery_data']
    labels = data_npz['descriptions']
    
    ## Extract left and right hand epochs
    data = data[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]
    labels = labels[np.any([labels == label_left_hand, labels == label_right_hand], axis=0)]
    
    ## Prepare training and testing data
    X_train, X_test, y_train, y_test = train_test_split(data, 
                                                        labels, 
                                                        test_size=test_split, 
                                                        shuffle=True,
                                                        random_state=random_state)
    
    ## Classification
    clf = sacsp.SACSP_LDA_classifier(X_train, y_train, cross_val=num_cross_val)
    cross_val_score_avg = clf.train_binary()
    predictions, train_accuracy = clf.test_binary()
    predictions, test_accuracy = clf.test_binary(X_test, y_test)
    results[i, :] = [train_accuracy, cross_val_score_avg, test_accuracy]
    print()
    
results_avg = np.mean(results, axis=0)

In [None]:
print(results)
print(results_avg)

## 4-Class One-vs-One Classification with LDA

In [7]:
## results records training accuracy, cross validation average score, and testing accuracy for each subject
results = np.zeros((len(all_data_paths), 3)) 

for i, data_path in enumerate(all_data_paths):
    
    ## Load data
    data_npz = np.load(data_path)
    data = data_npz['processed_motor_imagery_data']
    labels = data_npz['descriptions']

    ## Downsample with windowed means
    data_ds = utils.windowed_means(data, num_samples)
    
    ## Flatten data
    data_ds_flattened = utils.flatten_dim12(data_ds)
    
    ## Prepare training and testing data
    X_train, X_test, y_train, y_test = train_test_split(data_ds_flattened, 
                                                        labels, 
                                                        test_size=test_split, 
                                                        shuffle=True,
                                                        random_state=random_state)
    
    ## Classification
    clf = utils.LDA_classifier(X_train, y_train, cross_val=num_cross_val)
    cross_val_scores = clf.train_1_vs_1()
    predictions, train_accuracy = clf.test_1_vs_1()
    predictions, test_accuracy = clf.test_1_vs_1(X_test, y_test)
    results[i, :] = [train_accuracy, np.mean(cross_val_scores), test_accuracy]
    print()
    
results_avg = np.mean(results, axis=0)

Cross validation scores for labels  769  and  770 :  0.5952631578947368
Cross validation scores for labels  769  and  771 :  0.5269005847953216
Cross validation scores for labels  769  and  772 :  0.6254901960784314
Cross validation scores for labels  770  and  771 :  0.531578947368421
Cross validation scores for labels  770  and  772 :  0.604093567251462
Cross validation scores for labels  771  and  772 :  0.37279411764705883
Training accuracy:  0.8241758241758241
Testing accuracy:  0.24175824175824176

Cross validation scores for labels  769  and  770 :  0.5294117647058824
Cross validation scores for labels  769  and  771 :  0.5
Cross validation scores for labels  769  and  772 :  0.5426470588235295
Cross validation scores for labels  770  and  771 :  0.4742105263157895
Cross validation scores for labels  770  and  772 :  0.39869281045751637
Cross validation scores for labels  771  and  772 :  0.5263157894736842
Training accuracy:  0.7555555555555555
Testing accuracy:  0.177777777777

In [8]:
print(results)
print(results_avg)

[[0.82417582 0.54268676 0.24175824]
 [0.75555556 0.49521299 0.17777778]
 [0.81666667 0.53111455 0.18888889]
 [0.70285714 0.58366658 0.20689655]
 [0.71428571 0.46840959 0.17241379]
 [0.91780822 0.49216826 0.28767123]
 [0.64088398 0.42665778 0.27777778]
 [0.79545455 0.5377193  0.25      ]
 [0.89240506 0.540419   0.25316456]
 [1.         0.4456229  0.28      ]
 [1.         0.46111111 0.32258065]
 [1.         0.48031746 0.21428571]]
[0.83834106 0.50042552 0.2394346 ]


## 4-Class One-vs-One Classification with Regular CSP + LDA

In [5]:
## results records training accuracy, cross validation average score, and testing accuracy for each subject
results = np.zeros((len(all_data_paths), 3)) 

for i, data_path in enumerate(all_data_paths):
    
    ## Load data
    data_npz = np.load(data_path)
    data = data_npz['processed_motor_imagery_data']
    labels = data_npz['descriptions']
    
    ## Prepare training and testing data
    X_train, X_test, y_train, y_test = train_test_split(data, 
                                                        labels, 
                                                        test_size=test_split, 
                                                        shuffle=True,
                                                        random_state=random_state)
    
    ## Classification
    clf = csp.CSP_LDA_classifier(X_train, y_train, cross_val=num_cross_val, num_samples=num_samples)
    cross_val_scores = clf.train_1_vs_1()
    predictions, train_accuracy = clf.test_1_vs_1()
    predictions, test_accuracy = clf.test_1_vs_1(X_test, y_test)
    results[i, :] = [train_accuracy, np.mean(cross_val_scores), test_accuracy]
    print()
    
results_avg = np.mean(results, axis=0)

Cross validation scores for labels  769  and  770 :  0.6473684210526316
Cross validation scores for labels  769  and  771 :  0.39473684210526316
Cross validation scores for labels  769  and  772 :  0.5261437908496732
Cross validation scores for labels  770  and  771 :  0.6064327485380117
Cross validation scores for labels  770  and  772 :  0.5391812865497077
Cross validation scores for labels  771  and  772 :  0.5808823529411764
Training accuracy:  0.4230769230769231
Testing accuracy:  0.2857142857142857

Cross validation scores for labels  769  and  770 :  0.5294117647058825
Cross validation scores for labels  769  and  771 :  0.49064327485380116
Cross validation scores for labels  769  and  772 :  0.5316176470588235
Cross validation scores for labels  770  and  771 :  0.5663157894736842
Cross validation scores for labels  770  and  772 :  0.44575163398692813
Cross validation scores for labels  771  and  772 :  0.44210526315789467
Training accuracy:  0.4111111111111111
Testing accurac

In [6]:
print(results)
print(results_avg)

[[0.42307692 0.54912424 0.28571429]
 [0.41111111 0.50097423 0.14444444]
 [0.32777778 0.54187593 0.28888889]
 [0.35428571 0.54056731 0.2183908 ]
 [0.33714286 0.49270153 0.20689655]
 [0.35616438 0.52394437 0.20547945]
 [0.35359116 0.51810501 0.27777778]
 [0.38636364 0.53813209 0.23863636]
 [0.34810127 0.57002801 0.13924051]
 [0.42424242 0.51895623 0.22      ]
 [0.37704918 0.4484127  0.25806452]
 [0.41071429 0.47142857 0.35714286]]
[0.37580173 0.51785418 0.23672304]


## 4-Class One-vs-One Classification with Spectrally Adaptive CSP + LDA

In [None]:
## results records training accuracy, cross validation average score, and testing accuracy for each subject
results = np.zeros((len(all_data_paths), 3)) 

for i, data_path in enumerate(all_data_paths):
    
    ## Load data
    data_npz = np.load(data_path)
    data = data_npz['processed_motor_imagery_data']
    labels = data_npz['descriptions']
    
    ## Prepare training and testing data
    X_train, X_test, y_train, y_test = train_test_split(data, 
                                                        labels, 
                                                        test_size=test_split, 
                                                        shuffle=True,
                                                        random_state=random_state)
    
    ## Classification
    clf = sacsp.SACSP_LDA_classifier(X_train, y_train, cross_val=num_cross_val)
    cross_val_scores = clf.train_1_vs_1()
    predictions, train_accuracy = clf.test_1_vs_1()
    predictions, test_accuracy = clf.test_1_vs_1(X_test, y_test)
    results[i, :] = [train_accuracy, np.mean(cross_val_scores), test_accuracy]
    print()
    
results_avg = np.mean(results, axis=0)

In [None]:
print(results)
print(results_avg)