forked from algorithmcardboard/grasp-lift-eeg-detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gaussian_bofw_pipeline.py
83 lines (62 loc) · 2.65 KB
/
gaussian_bofw_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
import numpy.linalg as LA
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import svm
from sklearn.grid_search import GridSearchCV
from sklearn.externals import six
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from bofw import Bofw
import sys
import csv
DATA_DIR = "data/processed/"
N_COMPONENT = 2
subjects = range(1, 13)
X = np.concatenate([np.load("{0}/{1}/subj{2}_train_data.npy".format(DATA_DIR, N_COMPONENT, subject)) for subject in subjects])
y = np.concatenate([np.load("{0}/{1}/subj{2}_train_labels.npy".format(DATA_DIR, N_COMPONENT, subject)) for subject in subjects])
X_test = np.concatenate([np.load("{0}/{1}/subj{2}_val_data.npy".format(DATA_DIR, N_COMPONENT, subject)) for subject in subjects])
y_test = np.concatenate([np.load("{0}/{1}/subj{2}_val_labels.npy".format(DATA_DIR, N_COMPONENT, subject)) for subject in subjects])
y = y[:, 2]
y_test = y_test[:,2]
print(X.shape, y.shape)
print(X_test.shape, y_test.shape)
clf = svm.SVC(kernel='rbf',C=1)
myBofw = Bofw()
pca = PCA(n_components=0.9)
scaler = StandardScaler()
bofw_pipeline = Pipeline([('myown', myBofw), ('bofw_pca', pca), ('bofw_scaling', scaler), ('svm', clf)])
#num_clusters = [2**8, 2**9, 2**10, 2**11]
num_clusters = [2**11]
#cGrid= [2**8,2**9,2**10,2**11]
cGrid=[2**0, 2**1, 2**2, 2**3,2**4]
#cGrid=[2**-4,2**-3, 2**-2, 2**-1, 2**0, 2**1, 2**2, 2**3,2**4]
estimator = GridSearchCV(bofw_pipeline, dict(myown__num_clusters=num_clusters,svm__C=cGrid), n_jobs = 12,verbose=3)
estimator.fit(X,y)
estimator.predict(X_test)
score = estimator.score(X_test, y_test)
predictions = estimator.predict(X_test)
print(score)
y_binary = label_binarize(y_test,classes=[1,2,3,4,5,6])
predictions_binary=label_binarize(predictions,classes=[1,2,3,4,5,6])
aucTotal = 0
for i in range(0,6):
singleAuc=roc_auc_score(y_binary[:,i],predictions_binary[:,i])
aucTotal+=singleAuc
print("for label",i,"auc=",singleAuc)
print("ACU score ", aucTotal/6)
fileName = "AUC_"+str(N_COMPONENT)+"components.csv"
with open(fileName, "w") as myfile:
myfile.write("best estimator:"+str(estimator.best_score_)+"\n")
writer = csv.writer(myfile, delimiter = ",")
paramKeys = list(estimator.grid_scores_[0].parameters.keys())
writer.writerow(['mean']+ paramKeys)
for i in estimator.grid_scores_:
output = list()
output.append(i.mean_validation_score)
for k in paramKeys:
output.append(i.parameters.get(k))
writer.writerow(output)