<a href="https://colab.research.google.com/github/MBraum/SleepStudy/blob/master/GridSearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import numpy as np

In [None]:
iris = load_iris()
X = iris.data
y = iris.target

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

standardScaler = StandardScaler()
svm = SVC(kernel="linear", C = 0.1)

pipeline = Pipeline([('transformer', standardScaler), ('predictor', svm)])

scores = cross_val_score(pipeline, X, y, cv=10, scoring="accuracy")
scores

array([1.        , 0.93333333, 1.        , 1.        , 1.        ,
       0.93333333, 0.86666667, 1.        , 1.        , 1.        ])

In [None]:
scores.mean()

0.9733333333333334

In [None]:
C_range = np.logspace(-2, 3, 8)
C_scores = []

for C in C_range :
  svm = SVC(kernel="linear", C=C)
  standardScaler = StandardScaler()
  pipeline = Pipeline([('transformer', standardScaler), ('predictor', svm)])
  score = cross_val_score(pipeline, X, y, cv=10, scoring="accuracy")
  C_scores.append(score.mean())

print(C_range)
print(C_scores)

[1.00000000e-02 5.17947468e-02 2.68269580e-01 1.38949549e+00
 7.19685673e+00 3.72759372e+01 1.93069773e+02 1.00000000e+03]
[0.8933333333333333, 0.9533333333333334, 0.9666666666666668, 0.9666666666666668, 0.9666666666666668, 0.9800000000000001, 0.9733333333333334, 0.9733333333333334]


In [None]:
from sklearn.model_selection import GridSearchCV # Mais eficiente

In [None]:
C_range = np.logspace(-2, 3, 8)

param_grid = dict(predictor__C = C_range)
print(param_grid)

{'predictor__C': array([1.00000000e-02, 5.17947468e-02, 2.68269580e-01, 1.38949549e+00,
       7.19685673e+00, 3.72759372e+01, 1.93069773e+02, 1.00000000e+03])}


In [None]:
standardScaler = StandardScaler()
svm = SVC(kernel="linear", C=0.1)
pipeline = Pipeline([('transformer', standardScaler), ('predictor', svm)])
grid = GridSearchCV(pipeline, param_grid, cv=10, scoring='accuracy')
grid.fit(X, y)


GridSearchCV(cv=10, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('transformer',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('predictor',
                                        SVC(C=0.1, break_ties=False,
                                            cache_size=200, class_weight=None,
                                            coef0=0.0,
                                            decision_function_shape='ovr',
                                            degree=3, gamma='scale',
                                            kernel='linear', max_iter=-1,
                                            probability=False,
                                            random_state=None, shrinking=True,
                                      

In [None]:
grid.cv_results_["mean_test_score"]

array([0.89333333, 0.95333333, 0.96666667, 0.96666667, 0.96666667,
       0.98      , 0.97333333, 0.97333333])

In [None]:
grid.cv_results_["params"]

[{'predictor__C': 0.01},
 {'predictor__C': 0.05179474679231213},
 {'predictor__C': 0.2682695795279726},
 {'predictor__C': 1.3894954943731375},
 {'predictor__C': 7.196856730011521},
 {'predictor__C': 37.27593720314942},
 {'predictor__C': 193.06977288832496},
 {'predictor__C': 1000.0}]

In [None]:
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)
#grid.predict()

## Usando os melhores par√¢metros

In [None]:
## Retreinando
standardScaler = StandardScaler()
X_scaled       = standardScaler.fit_transform(X)

svm = SVC(kernel="linear", C = 37.27593720314942)
svm.fit(X_scaled, y)

SVC(C=37.27593720314942, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3, gamma='scale',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [None]:
new_instances = [[6.1, 2.4, 1.2, 0.1], [5.1, 1.4, 4.6, 1.0]]
new_instances_scaled = standardScaler.transform(new_instances)
svm.predict(new_instances_scaled)

array([0, 1])

In [None]:
grid.predict(new_instances)

array([0, 1])

## Random Search (mais barato)

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
# Criar pipeline
standardScaler = StandardScaler()
svm = SVC(kernel="rbf", C = 0.1, gamma = 0.1)
pipeline = Pipeline([('transformer', standardScaler), 
                     ('predictor', svm)])

# Definir intervalo de valores
param_dist = dict(
    predictor__gamma = np.logspace(-3, 3, 8),
    predictor__C     = np.logspace(-2, 3, 8)
)

rand = RandomizedSearchCV(pipeline, param_dist, cv=10, 
                          n_iter = 10, random_state = 2020,
                          scoring='accuracy')
rand.fit(X, y)


In [None]:
rand.cv_results_["mean_test_score"]

array([0.9       , 0.96      , 0.96666667, 0.94      , 0.72      ,
       0.86666667, 0.97333333, 0.86666667, 0.96      , 0.94666667])

In [None]:
print(rand.best_score_)
print(rand.best_params_)

0.9733333333333334
{'predictor__gamma': 0.0071968567300115215, 'predictor__C': 7.196856730011521}


In [None]:
rand.predict(new_instances)

array([0, 1])

## Comparando com o GridSearch

In [None]:
grid = GridSearchCV(pipeline, param_grid=param_dist, 
                    cv = 10, scoring = "accuracy")
grid.fit(X, y)
grid.cv_results_['mean_test_score']

array([0.86      , 0.86      , 0.86      , 0.90666667, 0.9       ,
       0.72      , 0.43333333, 0.37333333, 0.86      , 0.86      ,
       0.86      , 0.90666667, 0.9       , 0.72      , 0.43333333,
       0.37333333, 0.86      , 0.86      , 0.91333333, 0.96      ,
       0.94      , 0.72      , 0.43333333, 0.37333333, 0.86      ,
       0.89333333, 0.96666667, 0.96666667, 0.95333333, 0.86666667,
       0.51333333, 0.40666667, 0.88      , 0.97333333, 0.97333333,
       0.96      , 0.94      , 0.86666667, 0.51333333, 0.40666667,
       0.96      , 0.98      , 0.96666667, 0.94666667, 0.94      ,
       0.86666667, 0.51333333, 0.40666667, 0.98      , 0.96      ,
       0.96      , 0.94      , 0.94      , 0.86666667, 0.51333333,
       0.40666667, 0.96      , 0.96666667, 0.96      , 0.94      ,
       0.94      , 0.86666667, 0.51333333, 0.40666667])

In [None]:
print(grid.best_score_)
print(grid.best_params_)

0.9800000000000001
{'predictor__C': 37.27593720314942, 'predictor__gamma': 0.0071968567300115215}
