# **Bibliotecas comuns**

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

# **Como temos feito...**

In [None]:
from sklearn.neural_network import MLPClassifier

X, y = datasets.load_iris(return_X_y=True)
print(X.shape, y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

print(f'Train: {X_train.shape}, {y_train.shape}')

print(f'Test: {X_test.shape}, {y_test.shape}')


mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=200,
                    random_state=42).fit(X_train, y_train)
mlp.score(X_test, y_test)

# **Aplicando cross-validation**

**Basic**: When the cv argument is an integer, cross_val_score uses the KFold or StratifiedKFold strategies by default,

In [None]:
from sklearn.model_selection import cross_val_score

mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=200,
                    random_state=42).fit(X_train, y_train)
scores = cross_val_score(mlp, X_train, y_train, cv=5)
scores

In [None]:
print(f'{scores.mean():0.2f} accuracy with a standard deviation of {scores.std():0.2f}')

**K-Fold**

In [None]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5)
scores = cross_val_score(mlp, X_train, y_train, cv=kf)
scores

In [None]:
print(f'{scores.mean():0.2f} accuracy with a standard deviation of {scores.std():0.2f}')

**Stratified K-Fold**

Each set contains approximately the same percentage of samples of each target class as the complete set



In [None]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5)
scores = cross_val_score(mlp, X_train, y_train, cv=skf)
scores

In [None]:
print(f'{scores.mean():0.2f} accuracy with a standard deviation of {scores.std():0.2f}')

**Multiple metrics with cross_validate**

In [None]:
from sklearn.metrics import get_scorer_names

get_scorer_names()

In [None]:
from sklearn.model_selection import cross_validate
scoring = ['precision_macro', 'recall_macro', 'accuracy']
skf = StratifiedKFold(n_splits=5)
scores = cross_validate(mlp, X_train, y_train, scoring=scoring, cv=skf)
scores

In [None]:
scores['test_precision_macro']

# **Cross-validation + search para tuning de hiperparâmetros**

**Grid search**

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

mlp = MLPClassifier(random_state=42)
kf = KFold(n_splits=5)
param_grid = {'hidden_layer_sizes':[(100,),(50,),(25,),(50,25)],
              'max_iter':[100, 200, 300, 400, 500]}

gs = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=kf,
                  scoring='accuracy')

mlp_gridcv = gs.fit(X_train, y_train)

In [None]:
mlp_gridcv.best_params_

In [None]:
mlp_gridcv.best_score_

In [None]:
mlp_otima = MLPClassifier().set_params(**mlp_gridcv.best_params_)

mlp_otima.fit(X_train, y_train)
mlp_otima.score(X_test, y_test)

**Randomized search**

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV

mlp = MLPClassifier(random_state=42)
kf = KFold(n_splits=5)
param_rand = {'hidden_layer_sizes':[(100,),(50,),(25,),(50,25)],
              'max_iter':[100, 200, 300, 400, 500]}

rs = RandomizedSearchCV(estimator=mlp, param_distributions=param_rand, cv=kf,
                  scoring='accuracy', n_iter=10)

mlp_randomcv = rs.fit(X_train, y_train)

In [None]:
# import pandas as pd

# pd.DataFrame(mlp_randomcv.cv_results_)

In [None]:
mlp_randomcv.best_params_

In [None]:
mlp_randomcv.best_score_

In [None]:
mlp_otima = MLPClassifier().set_params(**mlp_randomcv.best_params_)

mlp_otima.fit(X_train, y_train)
mlp_otima.score(X_test, y_test)