-
Notifications
You must be signed in to change notification settings - Fork 0
/
base_model.py
204 lines (170 loc) · 7.98 KB
/
base_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# Update the BciPy models with methods for calculating performance across models using sklearn's metrics module.
# This will allow us to compare models using the same metrics.
import numpy as np
# from bcipy.helpers.task import TrialReshaper
from bcipy.helpers.exceptions import SignalException
from bcipy.signal.model import ModelEvaluationReport, SignalModel
from bcipy.signal.model.classifier import RegularizedDiscriminantAnalysis
from bcipy.signal.model import RdaKdeModel, PcaRdaKdeModel
from bcipy.signal.model.cross_validation import (
cost_cross_validation_auc,
cross_validation,
)
from bcipy.signal.model.density_estimation import KernelDensityEstimate
from bcipy.signal.model.dimensionality_reduction import (
ChannelWisePrincipalComponentAnalysis,
MockPCA,
)
from bcipy.signal.model.pipeline import Pipeline
from sklearn.utils.multiclass import unique_labels
class BaseRdaKdeModel(RdaKdeModel):
def fit(self, train_data: np.array, train_labels: np.array) -> SignalModel:
"""
Train on provided data using K-fold cross validation and return self.
Parameters:
train_data: shape (Channels, Trials, Trial_length) preprocessed data
train_labels: shape (Trials,) binary labels
Returns:
trained likelihood model
"""
model = Pipeline(
[
MockPCA(),
RegularizedDiscriminantAnalysis(),
]
)
# Find the optimal gamma + lambda values
arg_cv = cross_validation(train_data, train_labels, model=model, k_folds=self.k_folds)
# Get the AUC using those optimized gamma + lambda
rda_index = 1 # the index in the pipeline
model.pipeline[rda_index].lam = arg_cv[0]
model.pipeline[rda_index].gam = arg_cv[1]
tmp, sc_cv, y_cv = cost_cross_validation_auc(
model, rda_index, train_data, train_labels, arg_cv, k_folds=self.k_folds, split="uniform"
)
self.auc = -tmp
# After finding cross validation scores do one more round to learn the
# final RDA model
model.fit(train_data, train_labels)
# Insert the density estimates to the model and train using the cross validated
# scores to avoid over fitting. Observe that these scores are not obtained using
# the final model
model.add(KernelDensityEstimate(scores=sc_cv))
model.pipeline[-1].fit(sc_cv, y_cv)
self.model = model
if self.prior_type == "uniform":
self.log_prior_class_1 = self.log_prior_class_0 = np.log(0.5)
elif self.prior_type == "empirical":
prior_class_1 = np.sum(train_labels == 1) / len(train_labels)
self.log_prior_class_1 = np.log(prior_class_1)
self.log_prior_class_0 = np.log(1 - prior_class_1)
else:
raise ValueError("prior_type must be 'empirical' or 'uniform'")
self.classes_ = unique_labels(train_labels)
self._ready_to_predict = True
return self
def evaluate(self, test_data: np.array, test_labels: np.array) -> ModelEvaluationReport:
"""Computes AUROC of the intermediate RDA step of the pipeline using k-fold cross-validation
Args:
test_data (np.array): shape (Channels, Trials, Trial_length) preprocessed data.
test_labels (np.array): shape (Trials,) binary labels.
Raises:
SignalException: error if called before model is fit.
Returns:
ModelEvaluationReport: stores AUC
"""
if not self._ready_to_predict:
raise SignalException("must use model.fit() before model.evaluate()")
tmp_model = Pipeline([self.model.pipeline[0], self.model.pipeline[1]])
lam_gam = (self.model.pipeline[1].lam, self.model.pipeline[1].gam)
tmp, _, _ = cost_cross_validation_auc(
tmp_model, self.optimization_elements, test_data, test_labels,
lam_gam, k_folds=self.k_folds, split="uniform"
)
auc = -tmp
return ModelEvaluationReport(auc)
def predict(self, data: np.array) -> np.array:
"""
sklearn-compatible method for predicting
"""
if not self._ready_to_predict:
raise SignalException("must use model.fit() before model.predict()")
# p(l=1 | e) = p(e | l=1) p(l=1)
probs = self.predict_proba(data)
return probs.argmax(-1)
class BasePcaRdaKdeModel(PcaRdaKdeModel):
# reshaper = TrialReshaper()
def fit(self, train_data: np.array, train_labels: np.array) -> SignalModel:
"""
Train on provided data using K-fold cross validation and return self.
Parameters:
train_data: shape (Channels, Trials, Trial_length) preprocessed data
train_labels: shape (Trials,) binary labels
Returns:
trained likelihood model
"""
model = Pipeline(
[
ChannelWisePrincipalComponentAnalysis(n_components=self.pca_n_components, num_ch=train_data.shape[0]),
RegularizedDiscriminantAnalysis(),
]
)
# Find the optimal gamma + lambda values
arg_cv = cross_validation(train_data, train_labels, model=model, k_folds=self.k_folds)
# Get the AUC using those optimized gamma + lambda
rda_index = 1 # the index in the pipeline
model.pipeline[rda_index].lam = arg_cv[0]
model.pipeline[rda_index].gam = arg_cv[1]
tmp, sc_cv, y_cv = cost_cross_validation_auc(
model, rda_index, train_data, train_labels, arg_cv, k_folds=self.k_folds, split="uniform"
)
self.auc = -tmp
# After finding cross validation scores do one more round to learn the
# final RDA model
model.fit(train_data, train_labels)
# Insert the density estimates to the model and train using the cross validated
# scores to avoid over fitting. Observe that these scores are not obtained using
# the final model
model.add(KernelDensityEstimate(scores=sc_cv))
model.pipeline[-1].fit(sc_cv, y_cv)
self.model = model
if self.prior_type == "uniform":
self.log_prior_class_1 = self.log_prior_class_0 = np.log(0.5)
elif self.prior_type == "empirical":
prior_class_1 = np.sum(train_labels == 1) / len(train_labels)
self.log_prior_class_1 = np.log(prior_class_1)
self.log_prior_class_0 = np.log(1 - prior_class_1)
else:
raise ValueError("prior_type must be 'empirical' or 'uniform'")
self.classes_ = unique_labels(train_labels)
self._ready_to_predict = True
return self
def evaluate(self, test_data: np.array, test_labels: np.array) -> ModelEvaluationReport:
"""Computes AUROC of the intermediate RDA step of the pipeline using k-fold cross-validation
Args:
test_data (np.array): shape (Channels, Trials, Trial_length) preprocessed data.
test_labels (np.array): shape (Trials,) binary labels.
Raises:
SignalException: error if called before model is fit.
Returns:
ModelEvaluationReport: stores AUC
"""
if not self._ready_to_predict:
raise SignalException("must use model.fit() before model.evaluate()")
tmp_model = Pipeline([self.model.pipeline[0], self.model.pipeline[1]])
lam_gam = (self.model.pipeline[1].lam, self.model.pipeline[1].gam)
tmp, _, _ = cost_cross_validation_auc(
tmp_model, self.optimization_elements, test_data, test_labels,
lam_gam, k_folds=self.k_folds, split="uniform"
)
auc = -tmp
return ModelEvaluationReport(auc)
def predict(self, data: np.array) -> np.array:
"""
sklearn-compatible method for predicting
"""
if not self._ready_to_predict:
raise SignalException("must use model.fit() before model.predict()")
# p(l=1 | e) = p(e | l=1) p(l=1)
probs = self.predict_proba(data)
return probs.argmax(-1)