# Random Search
- [Documentação](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html)

In [62]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
import pandas as pd
from scipy.stats import loguniform

## Carregando dados

In [63]:
X, y = datasets.load_wine(return_X_y=True)
X.shape, y.shape

((178, 13), (178,))

In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=8)
X_train.shape, X_test.shape

((133, 13), (45, 13))

## Realizando a busca

- Setando os parâmetros:

In [65]:
# Como vamos usar um pipeline, precisa desse svm__
params = [
    {
        "svm__kernel": ["rbf"],
        "svm__gamma": loguniform(1e0, 1e3),
        "svm__C": loguniform(1e-4, 1e-3)
    }
]

n_iter = 20

In [66]:
model_pipeline = Pipeline([
                            ("scaler", StandardScaler()),
                            ("svm", SVC())
                         ])


# podemos alterar o scoring
search = RandomizedSearchCV(model_pipeline, params, n_iter=n_iter, scoring="accuracy", cv=5)

search.fit(X_train, y_train)

## Verificando os resultados

- Melhores resultados e parametros:

In [67]:
search.best_score_

0.41367521367521365

In [68]:
search.best_params_

{'svm__C': 0.00024366425837521794,
 'svm__gamma': 2.941046443698509,
 'svm__kernel': 'rbf'}

- Tabela de resultados:

In [69]:
pd.DataFrame.from_dict(search.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_svm__C,param_svm__gamma,param_svm__kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001339,0.00027,0.000629,6.7e-05,0.000244,2.941046,rbf,"{'svm__C': 0.00024366425837521794, 'svm__gamma...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
1,0.001338,0.000141,0.000557,3e-05,0.00037,24.019618,rbf,"{'svm__C': 0.0003703263370116871, 'svm__gamma'...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
2,0.00114,0.000114,0.000489,4.9e-05,0.00022,593.341071,rbf,"{'svm__C': 0.00022024923702900498, 'svm__gamma...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
3,0.000965,6.5e-05,0.000439,1.9e-05,0.000619,1.325834,rbf,"{'svm__C': 0.0006186897040685503, 'svm__gamma'...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
4,0.00093,5.5e-05,0.000437,1.2e-05,0.000216,359.423733,rbf,"{'svm__C': 0.00021565225020834408, 'svm__gamma...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
5,0.00091,8e-06,0.000443,1.5e-05,0.000334,5.713625,rbf,"{'svm__C': 0.00033449860950191234, 'svm__gamma...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
6,0.000915,1.6e-05,0.000444,2.1e-05,0.000136,915.938543,rbf,"{'svm__C': 0.00013591235313638704, 'svm__gamma...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
7,0.001124,0.000151,0.000482,3.1e-05,0.000504,3.782508,rbf,"{'svm__C': 0.0005038287361817334, 'svm__gamma'...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
8,0.001007,6e-05,0.000454,2.5e-05,0.000203,2.73084,rbf,"{'svm__C': 0.00020311178837602374, 'svm__gamma...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1
9,0.001001,4.4e-05,0.000509,5.2e-05,0.000138,1.35661,rbf,"{'svm__C': 0.00013768267736326642, 'svm__gamma...",0.407407,0.407407,0.407407,0.423077,0.423077,0.413675,0.007676,1


___
# Exercícios:
1. Encontrar um intervalo melhor de valores para o randomSearch
2. Replicar os resultados para o Halving Search disponíveis na [documentação da sklearn](https://scikit-learn.org/stable/auto_examples/model_selection/plot_successive_halving_heatmap.html#sphx-glr-auto-examples-model-selection-plot-successive-halving-heatmap-py)
___

In [70]:
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import GridSearchCV, HalvingGridSearchCV
from time import time
import numpy as np
import matplotlib.pyplot as plt

In [72]:
X, y = datasets.load_wine(return_X_y=True)
X.shape, y.shape

((178, 13), (178,))

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=8)
X_train.shape, X_test.shape

((133, 13), (45, 13))

In [84]:
# Definindo o pipeline
model_pipeline = Pipeline([
    ('svm', SVC())
])

# Definindo os parâmetros de busca
grid_param = [
    {
        "svm__kernel": ["rbf"], 
        "svm__gamma": [1e-3, 1e-4], 
        "svm__C": [1, 10, 100, 1000]
    },
    {
        "svm__kernel": ["linear"], 
        "svm__C": [1, 10, 100, 1000]
    }
]

# Definindo o HalvingGridSearchCV
search = HalvingGridSearchCV(model_pipeline, grid_param, scoring="accuracy", cv=5)

# Ajustando o modelo
search.fit(X_train, y_train)


In [85]:
search.best_score_

0.9549019607843137

In [86]:
search.best_estimator_

In [87]:
search.best_params_

{'svm__C': 100, 'svm__kernel': 'linear'}