In [1]:
import pandas as pd
import numpy as np 
import scipy.io as sio 
import matplotlib.pyplot as plt 
from os import getcwd
from os.path import join 
from statsmodels.tsa.ar_model import AutoReg
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, accuracy_score 


mat_contents = sio.loadmat('data3SS2009.mat')
dataset = mat_contents['dataset']
n_samples, n_channels, n_cases = dataset.shape # N, Chno, Nc
y = mat_contents['labels'].reshape(n_cases)

Ch1 = pd.DataFrame(dataset[:,0,:])
Ch2 = pd.DataFrame(dataset[:,1,:])
Ch3 = pd.DataFrame(dataset[:,2,:])
Ch4 = pd.DataFrame(dataset[:,3,:])
Ch5 = pd.DataFrame(dataset[:,4,:])

Feat = []
lags = 30
for i in range(0,n_cases):
    ar_model_Ch1 = AutoReg(Ch2[i].values, lags = lags).fit()
    ar_model_Ch2 = AutoReg(Ch3[i].values, lags = lags).fit()
    ar_model_Ch3 = AutoReg(Ch4[i].values, lags = lags).fit()
    ar_model_Ch4 = AutoReg(Ch5[i].values, lags = lags).fit()
    ar_models = np.concatenate([ar_model_Ch1.params, ar_model_Ch2.params, ar_model_Ch3.params, ar_model_Ch4.params])
    Feat.append(ar_models)


pca1 = PCA(n_components = 7) # 7 componentes obtidas da atividade II (95% da variância)
pca1.fit(Feat)
x_pca = pca1.transform(Feat)
print("Original shape: {}".format(str(np.shape(Feat))))
print("Reduced shape: {}".format(str(np.shape(x_pca))))

Original shape: (850, 124)
Reduced shape: (850, 7)


In [137]:
min_max_scaler = StandardScaler()
Feat_scaled = min_max_scaler.fit_transform(x_pca)
Y = mat_contents['labels'].reshape(n_cases)

## Atividade -> Supervisionado 4

Use randomized search with repeated cross-validation for hyperparameter
tuning. Use e.g. 

a) n_splits = 5, n_repeats = 50 [RepeatedKFold]

b) n_iter = 100, n_jobs = -1 (will use all your cores), cv = (object you created with
RepeatedKFold in 2.A), scoring = (choose a performance metric for classification
problems), [RandomizedSearchCV]

1. To linearSVC optimized params:

In [152]:
from sklearn.svm import LinearSVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import LinearSVC
import scipy.stats as stats
from sklearn.model_selection import RepeatedKFold

x_train, x_test, y_train, y_test = train_test_split(Feat_scaled, Y, test_size = 0.4, random_state = 0)

rkf = RepeatedKFold(n_splits = 5, n_repeats = 50, random_state = 42)
rkf.split(x_train, y_train)

# Randomized Search
rand_parameters = [{'C':range(0,25), 'loss':['hinge', 'square_hinge']}]
modelo_svc = LinearSVC() 
random_search_svm = RandomizedSearchCV(modelo_svc, rand_parameters, verbose = 0, n_iter = 100,
                                       n_jobs = -1, cv = rkf, scoring = 'neg_mean_squared_error', random_state=42)
random_search_svm.fit(x_train, y_train)
BPSVC = random_search_svm.best_params_

6500 fits failed out of a total of 12500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
250 fits failed with the following error:
Traceback (most recent call last):
  File "C:\python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\python39\lib\site-packages\sklearn\svm\_classes.py", line 257, in fit
    self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
  File "C:\python39\lib\site-packages\sklearn\svm\_base.py", line 1186, in _fit_liblinear
    raw_coef_, n_iter_ = liblinear.train_wrap(
  File "sklearn\svm\_liblinear.pyx", line 52, in sklearn.svm._liblinear.train_wrap
ValueError: b'C <= 0'

-----------------------------------

2. To optimize KNN params:

In [153]:
from sklearn.neighbors import KNeighborsClassifier

rkf = RepeatedKFold(n_splits = 5, n_repeats = 50, random_state = 42)
rkf.split(x_train, y_train)

# Randomized Search
rand_parameters = [{'n_neighbors':range(0,50), 'weights':['uniform', 'distance']}]
modelo_knn = KNeighborsClassifier() 
random_search_knn = RandomizedSearchCV(modelo_knn, rand_parameters, verbose = 0, n_iter = 100,
                                       n_jobs = -1, cv = rkf, scoring = 'neg_mean_squared_error', random_state=42)
random_search_knn.fit(x_train, y_train)
BP_knn = random_search_knn.best_params_

500 fits failed out of a total of 25000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
500 fits failed with the following error:
Traceback (most recent call last):
  File "C:\python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\python39\lib\site-packages\sklearn\neighbors\_classification.py", line 198, in fit
    return self._fit(X, y)
  File "C:\python39\lib\site-packages\sklearn\neighbors\_base.py", line 569, in _fit
    raise ValueError("Expected n_neighbors > 0. Got %d" % self.n_neighbors)
ValueError: Expected n_neighbors > 0. Got 0

 -1.0074902  -1.0087451  -1.04921569 -0.99819608 -0.91294118 -0.95168627
 -1.03701961 -0.97011765

3. Softmax

In [154]:
from sklearn.linear_model import LogisticRegression
from scipy.stats import uniform

rkf = RepeatedKFold(n_splits = 5, n_repeats = 50, random_state = 42)
rkf.split(x_train, y_train)

# Randomized Search
rand_parameters = [{'C':uniform(loc = 0, scale = 4), 'penalty':['l1', 'l2']}]
modelo_LRCV = LogisticRegression() 
random_search_LR = RandomizedSearchCV(modelo_LRCV, rand_parameters, verbose = 0, n_iter = 100,
                                       n_jobs = -1, cv = rkf, scoring = 'neg_mean_squared_error', random_state=42)
random_search_LR.fit(x_train, y_train)
BR_LR = random_search_LR.best_params_

10750 fits failed out of a total of 25000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10750 fits failed with the following error:
Traceback (most recent call last):
  File "C:\python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

 -0.79862745 -1.27356863 -0.78423529 -7.37776471 -0.972862

In [165]:
import pandas as pd 

result = pd.DataFrame(index = ['Paramentro 0','Paramentro 1'])
result['Knn_Best_Params'] = BP_knn 
result['Knn_Best_Params_Valor'] = BP_knn.values()
result['SVC_Best_Params'] = BPSVC
result['SVC_Best_Params_Valor'] = BPSVC.values()
result['L_Reg_Best_Params'] = BR_LR
result['L_Reg_Best_Params_Valor'] = BR_LR.values()
result.head()

Unnamed: 0,Knn_Best_Params,Knn_Best_Params_Valor,SVC_Best_Params,SVC_Best_Params_Valor,L_Reg_Best_Params,L_Reg_Best_Params_Valor
Paramentro 0,weights,uniform,loss,hinge,C,3.947548
Paramentro 1,n_neighbors,5,C,7,penalty,l2


### GridSearch

In [95]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
import scipy.stats as stats

modelo_tot = LinearSVC() #C = i, loss = 'hinge'

rand_par = [{'C':[5,25,50,100],'loss':['hinge','squared_hinge']} ]
random_search = GridSearchCV(modelo_tot, rand_par)

random_search.fit(x_train, y_train)

random_search.best_params_



{'C': 5, 'loss': 'squared_hinge'}