In [20]:
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.model_selection import GridSearchCV, train_test_split, RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score

from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern, RationalQuadratic, WhiteKernel, ExpSineSquared

grid = dict()
grid['kernel'] = [1*RBF(), 1*DotProduct(), 1*Matern(),  1*RationalQuadratic(), 1*WhiteKernel(), 1*ExpSineSquared()]


dataDir = Path.cwd().parent.parent.parent/'Data/processed'
ts_dataset = pd.read_csv(dataDir/"ts_dataset.csv", index_col="id")

In [10]:
X = ts_dataset.iloc[:, 0:len(ts_dataset.columns)-1].copy()
y = pd.DataFrame(ts_dataset.iloc[:, -1])
# split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=20)
model = GaussianProcessClassifier()
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=3, random_state=20)
# exhausive search over different kernels
search = GridSearchCV(estimator=model, param_grid=grid, scoring='accuracy', cv=cv, n_jobs=-1)
# fit model into the dataset
result = search.fit(X, y)

print("Mean cross-validated score of the best_estimator: ", result.best_score_)
print("Best estimator parameters: ", result.best_params_)

means = result.cv_results_['mean_test_score']
params = result.cv_results_['params']
for mean, param in zip(means, params):
    print("Accuracy %.3f with: %r" % (mean, param))

Mean cross-validated score of the best_estimator:  1.0
Best estimator parameters:  {'kernel': 1**2 * DotProduct(sigma_0=1)}
Accuracy 0.896 with: {'kernel': 1**2 * RBF(length_scale=1)}
Accuracy 1.000 with: {'kernel': 1**2 * DotProduct(sigma_0=1)}
Accuracy 1.000 with: {'kernel': 1**2 * Matern(length_scale=1, nu=1.5)}
Accuracy 1.000 with: {'kernel': 1**2 * RationalQuadratic(alpha=1, length_scale=1)}
Accuracy 0.500 with: {'kernel': 1**2 * WhiteKernel(noise_level=1)}
Accuracy nan with: {'kernel': 1**2 * ExpSineSquared(length_scale=1, periodicity=1)}


5 fits failed out of a total of 54.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "d:\Toolbox\PyCharm Community Edition 2021.2.2\proj_venv\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\Toolbox\PyCharm Community Edition 2021.2.2\proj_venv\lib\site-packages\sklearn\gaussian_process\_gpc.py", line 719, in fit
    self.base_estimator_.fit(X, y)
  File "d:\Toolbox\PyCharm Community Edition 2021.2.2\proj_venv\lib\site-packages\sklearn\gaussian_process\_gpc.py", line 224, in fit
    self._constrained_optimization(
  File "d:\Toolbox\PyCharm Community Edition 202

In [22]:
best_kernel = 1*Matern()
model = GaussianProcessClassifier(kernel=best_kernel, random_state=20, n_jobs=-1)
model.fit(X_train, y_train.values.ravel())
print("Mean accuracy on training data: ", model.score(X_train, y_train))
print("Prediction on test data: ", model.predict(X_test))
pred_test = model.predict(X_test)
print("Prediction accuracy on test data: ", accuracy_score(y_test, pred_test))

Mean accuracy on training data:  1.0
Prediction on test data:  [0 0 1 0]
Prediction accuracy on test data:  1.0


In [12]:
ts_dataset

Unnamed: 0_level_0,timeDelta_Seconds__sum_values,timeDelta_Seconds__ar_coefficient__coeff_0__k_10,"timeDelta_Seconds__change_quantiles__f_agg_""var""__isabs_False__qh_0.8__ql_0.0","timeDelta_Seconds__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.0","timeDelta_Seconds__change_quantiles__f_agg_""var""__isabs_True__qh_0.8__ql_0.0","timeDelta_Seconds__change_quantiles__f_agg_""mean""__isabs_True__qh_1.0__ql_0.0","timeDelta_Seconds__change_quantiles__f_agg_""var""__isabs_True__qh_0.6__ql_0.2","timeDelta_Seconds__change_quantiles__f_agg_""var""__isabs_False__qh_0.8__ql_0.2","timeDelta_Seconds__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.2",timeDelta_Seconds__quantile__q_0.8,...,"timeDelta_Seconds__fft_coefficient__attr_""real""__coeff_0",timeDelta_Seconds__count_above__t_0,timeDelta_Seconds__count_below__t_0,"timeDelta_Seconds__change_quantiles__f_agg_""var""__isabs_False__qh_0.8__ql_0.4",timeDelta_Seconds__quantile__q_0.7,"timeDelta_Seconds__fft_coefficient__attr_""angle""__coeff_0",timeDelta_Seconds__mean_abs_change,timeDelta_Seconds__mean,timeDelta_Seconds__variation_coefficient,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-258.270455,-0.059432,0.028329,0.083289,0.021393,0.331732,0.003371,0.026488,0.07955,0.011136,...,-258.270455,0.200708,0.799292,0.013697,-0.22619,180.0,0.331732,-0.152462,-5.832477,0
2,-65.81825,-0.052343,0.092498,0.217239,0.045306,0.769818,0.005436,0.099213,0.236574,0.203931,...,-65.81825,0.261468,0.738532,0.074653,-0.122847,180.0,0.769818,-0.07548,-12.314639,0
3,222.515677,0.224628,0.243725,0.39394,0.088553,1.092149,0.015533,0.202925,0.358721,0.801608,...,222.515677,0.436019,0.563981,0.153784,0.362611,0.0,1.092149,0.351526,3.822593,1
4,191.531087,0.421148,0.323197,0.45469,0.117113,1.072078,0.015992,0.253044,0.403896,0.934202,...,191.531087,0.470054,0.529946,0.193257,0.45569,0.0,1.072078,0.347606,3.096461,1
5,-15.281514,-0.015041,0.080499,0.22583,0.029538,0.68012,0.004516,0.062471,0.199104,0.163952,...,-15.281514,0.258065,0.741935,0.051094,-0.074841,180.0,0.68012,-0.016998,-50.369007,0
6,79.167425,0.044996,0.168901,0.317342,0.068374,0.871197,0.011733,0.170684,0.331177,0.509252,...,79.167425,0.353096,0.646904,0.13374,0.125825,0.0,0.871197,0.104305,10.689828,1
7,-127.520034,-0.106161,0.087597,0.220981,0.038765,0.643985,0.006356,0.089296,0.234857,0.130919,...,-127.520034,0.247845,0.752155,0.061424,-0.103166,180.0,0.643985,-0.137414,-5.414806,0
8,276.115971,0.167051,0.466056,0.511193,0.204795,1.229253,0.022964,0.505636,0.566458,1.060158,...,276.115971,0.41345,0.58655,0.436364,0.581029,0.0,1.229253,0.343856,3.98983,1
9,-75.125493,-0.074791,0.089882,0.231704,0.036224,0.634169,0.006688,0.082267,0.229354,0.123973,...,-75.125493,0.255629,0.744371,0.05703,-0.091049,180.0,0.634169,-0.099504,-7.987953,0
10,36.089554,0.021588,0.140618,0.274528,0.065268,0.889185,0.006735,0.152795,0.302684,0.421731,...,36.089554,0.288241,0.711759,0.129388,-0.03903,0.0,0.889185,0.037554,28.039183,1
