# Hiperparâmetros - por quê são importantes, exemplos de uso.

# Machine Learning

In [None]:
# Imports necessários
import warnings
import numpy as np

from sklearn import datasets
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.ensemble import RandomForestClassifier

# Setar a seed
SEED = 42
np.random.seed(SEED)

# Ignorar warnings
warnings.filterwarnings('ignore')

#Carregar o dataset
iris = datasets.load_iris()

#Grid de hiperparâmetros
parameters = {'n_estimators':[10,100],
              'criterion':['gini','entropy'],
              'max_depth':[1,10],
              'min_samples_split':[2,10],
              'min_samples_leaf':[1,5,10],
              'max_features':['auto','sqrt','log2']}

# Números de divisões(splits) no dataset
kf = KFold(n_splits=10)

# GridSearch
clf = GridSearchCV(estimator = RandomForestClassifier(),
                   param_grid = parameters,
                  cv = kf)
clf.fit(iris.data, iris.target)

# Melhores hiperparâmetros e sua nota
print(f'Melhor conjunto de hiperparâmetros: \n {clf.best_estimator_}'+\
     f"\nSua nota: {clf.best_score_*100}% de acurácia")

Melhor conjunto de hiperparâmetros: 
 RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=10, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=10, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)
Sua nota: 96.0% de acurácia


In [None]:
# Resultados das outras tentativas
clf.cv_results_

{'mean_fit_time': array([0.01274467, 0.11702392, 0.01267412, 0.13059046, 0.01276767,
        0.13330929, 0.01441386, 0.11883304, 0.0147892 , 0.12740695,
        0.0123173 , 0.11951656, 0.01209996, 0.11607058, 0.01209016,
        0.11656055, 0.01242275, 0.12803087, 0.01323607, 0.12623022,
        0.01488736, 0.12332788, 0.0163578 , 0.12849321, 0.01578591,
        0.12965312, 0.01358366, 0.12209582, 0.01247807, 0.15753319,
        0.01343765, 0.12386949, 0.01237252, 0.1205267 , 0.0124866 ,
        0.12067008, 0.01298525, 0.11940069, 0.01230755, 0.12858741,
        0.01543972, 0.12720513, 0.01367993, 0.12611651, 0.01277847,
        0.12575719, 0.01297762, 0.1212404 , 0.01273119, 0.1224148 ,
        0.01274979, 0.1198405 , 0.01237636, 0.12039678, 0.01258111,
        0.1198432 , 0.01246524, 0.11831627, 0.01240754, 0.12108526,
        0.01279621, 0.12336829, 0.01310754, 0.12435296, 0.01590326,
        0.13190513, 0.01290326, 0.14078848, 0.01596136, 0.12696698,
        0.01378853, 0.12342975,

# Deep learning

In [None]:
# Imports necessários
import warnings

import numpy as np
import tensorflow as tf

from tensorflow import keras
from sklearn import datasets
from keras.layers import Dense
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold

# Setar a seed
SEED = 42
np.random.seed(SEED)

# Ignorar warnings
warnings.filterwarnings('ignore')

#Carregar o dataset
iris = datasets.load_iris()

# Definindo o modelo MLP
def SimpleModelDL():
    # Arquitetura do modelo
    model = Sequential()
    model.add(Dense(32, input_dim=4, activation='softmax'))
    model.add(Dense(16, input_dim=4, activation='softmax'))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compilação e otimizador
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

#Grid de hiperparâmetros
parameters = {'batch_size':[10, 50, 100],
              'epochs':[5, 10, 100]}

# Números de divisões(splits) no dataset
kf = KFold(n_splits=10)

# GridSearch
clf = RandomizedSearchCV(estimator = KerasClassifier(build_fn=SimpleModelDL, verbose=0),
                   param_distributions = parameters,
                   n_iter=3,
                   cv = kf)
clf.fit(iris.data, iris.target)

# Melhores hiperparâmetros e sua nota
print(f'Melhor conjunto d hiperparâmetros: \n {clf.best_estimator_.get_params()}'+\
     f"\nSua nota: {clf.best_score_*100}% de acurácia")

Melhor conjunto d hiperparâmetros: 
 {'verbose': 0, 'epochs': 10, 'batch_size': 10, 'build_fn': <function SimpleModelDL at 0x7fc9a5e74820>}
Sua nota: 33.33333373069763% de acurácia


In [None]:
# Resultados das outras tentativas
clf.cv_results_

{'mean_fit_time': array([0.71752317, 1.21836748, 1.53099484]),
 'std_fit_time': array([0.11437586, 0.20095391, 0.15076465]),
 'mean_score_time': array([0.17563717, 0.20407715, 0.16590595]),
 'std_score_time': array([0.0134469 , 0.02404005, 0.01125242]),
 'param_epochs': masked_array(data=[10, 10, 100],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'param_batch_size': masked_array(data=[100, 10, 50],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'epochs': 10, 'batch_size': 100},
  {'epochs': 10, 'batch_size': 10},
  {'epochs': 100, 'batch_size': 50}],
 'split0_test_score': array([0., 0., 0.]),
 'split1_test_score': array([1., 0., 0.]),
 'split2_test_score': array([1., 0., 0.]),
 'split3_test_score': array([0.33333334, 0.66666669, 0.66666669]),
 'split4_test_score': array([0., 1., 1.]),
 'split5_test_score': array([1., 1., 1.]),
 'split6_test_score': array([0.        , 0.66666669

# Referências

- GridSearch: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
- RandomSearch: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
- KFold: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
- Modelo RandomForestClassifier: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
- Como montar uma simples arquitura mlp com GridSearch: https://machinelearningmastery.com/
grid-search-hyperparameters-deep-learning-models-python-keras/
- Como criar uma arquitetura de um modelo de DeepLearning: https://keras.io/api/models/model/
- Camada de mlp: https://keras.io/api/layers/core_layers/dense/
- API Reference do Keras: https://keras.io/api/