In [10]:
# Import libs
import os
import time
import pandas as pd

# Pipeline lib
from sklearn.pipeline import Pipeline

# feature-engine libs
from sklearn.preprocessing import StandardScaler
from feature_engine.encoding import OneHotEncoder
from feature_engine.imputation import CategoricalImputer
from feature_engine.imputation import ArbitraryNumberImputer
from feature_engine.wrappers import SklearnTransformerWrapper

# machine learning models
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.utils.fixes import loguniform
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

# trainning models
from sklearn.model_selection import GridSearchCV

In [2]:
# Read the dataset
df_abt = pd.read_csv('/Users/dellacorte/py-projects/data-science/supervised-learning-pipeline-reference/databases/propensao_revenda_abt.csv')

# pega a base de treinamento
df_train = df_abt.query('data_ref_safra < "2018-03-01"')

# pega a base de avaliação (out of time)
df_oot   = df_abt.query('data_ref_safra == "2018-03-01"')

key_vars = ['data_ref_safra', 'seller_id']
num_vars = ['tot_orders_12m', 'tot_items_12m', 'tot_items_dist_12m', 'receita_12m', 'recencia']
cat_vars = ['uf']
target = 'nao_revendeu_next_6m'

features = cat_vars + num_vars

# dados de treinamento
X_train = df_train[features]
y_train = df_train[target]

# dados de avaliação (out of time)
X_oot = df_oot[features]
y_oot = df_oot[target]

In [3]:
datapipe = [
            ('numeric_imputer', ArbitraryNumberImputer(variables=num_vars, arbitrary_number=-999)),
            ('categoric_imputer', CategoricalImputer(variables=cat_vars, return_object=True)),
            ('one_hot_encoder', OneHotEncoder(variables=cat_vars))
]

### Decision Tree

The main hyperparameters that we can use in GridSearch:

* `max_depth`: Depth of the tree. The default value will be to build the tree until the leaves contain less than the value defined in `min_samples_split`. Possible values: `2, 3, 4, 5, 6, 7, ...`.
* `criterion`: Tree separation function. Possible values: `gini` and `entropy`.
* `class_weight`: weights of classes in a dictionary. The `balanced` value will define values ​​in inverse proportion to the class frequencies. Possible values: `balanced` and `None`.
* `min_samples_split`: Minimum number of samples required to split a node. Default value is 2.

In [4]:
t1 = time.time()

pipeline = Pipeline(steps=datapipe + [('decision_tree', DecisionTreeClassifier())])

parametros = {
  'decision_tree__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
  'decision_tree__criterion': ['gini', 'entropy'],
  'decision_tree__class_weight': ['balanced', None],
  'decision_tree__min_samples_split': [2],
}

grid_search = GridSearchCV(pipeline, parametros, scoring='roc_auc', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print()
print('='*100)
print(grid_search.best_params_)
print('\nIt took {} seconds for GridSearch to Decision Tree'.format(time.time() - t1))
print('='*100)
print()

Fitting 5 folds for each of 40 candidates, totalling 200 fits

{'decision_tree__class_weight': None, 'decision_tree__criterion': 'entropy', 'decision_tree__max_depth': 4, 'decision_tree__min_samples_split': 2}

It took 3.4554507732391357 seconds for GridSearch to Decision Tree



### Random Forest

The main hyperparameters that we can use in GridSearch:

* `n_estimators`: Random number of trees in the forest. Possible values: `120, 300, 500, 800, 1200`.
* `max_depth`: Depth of the tree. The default value will be to build the tree until the leaves contain less than the value defined in `min_samples_split`. Possible values: `5, 8, 15, 25, 30, None`.
* `max_features`: Number of attributes (characteristics) to analyze in the separation. The default is all (`auto`).
Possible values: `log2, sqrt, None`.
* `min_samples_split`: Minimum number of samples required to split a node. Default value is 2. Possible values: `2, 5, 10, 15, 100`.

* `min_samples_leaf`: Minimum number of samples in each leaf. Default value is 1. Possible values: `1, 2, 5, 10`.

* `class_weight`: weights of classes in a dictionary. The `balanced` value will define values ​​in inverse proportion to the class frequencies. Possible values: `balanced` and `None`.

In [5]:
t2 = time.time()

pipeline = Pipeline(steps=datapipe + [('random_forest', RandomForestClassifier())])

parametros = {
    'random_forest__n_estimators': [120, 300], # 500, 800, 1200],
    'random_forest__max_depth': [5, 8, 15], # 25, 30, None],
    'random_forest__max_features': ['log2'], # 'sqrt', 'None'],
    'random_forest__min_samples_split': [2, 5], # 10, 15, 100],
    'random_forest__min_samples_leaf': [1, 2], # 5, 10],
    'random_forest__class_weight': ['balanced', None]
}

grid_search = GridSearchCV(pipeline, parametros, scoring='roc_auc', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print()
print('='*100)
print(grid_search.best_params_)
print('\nDemorou {} segundos para GridSearch para Random Forest'.format(time.time() - t2))
print('='*100)
print()

Fitting 5 folds for each of 48 candidates, totalling 240 fits

{'random_forest__class_weight': 'balanced', 'random_forest__max_depth': 15, 'random_forest__max_features': 'log2', 'random_forest__min_samples_leaf': 1, 'random_forest__min_samples_split': 2, 'random_forest__n_estimators': 300}

Demorou 20.418328046798706 segundos para GridSearch para Random Forest



### XGBoost

The main hyperparameters that we can use in GridSearch:

* `learning_rate`: Learning rate (also known as eta) for boosting (between 0 and 1). After each boosting step, the newly added weights are scaled according to this factor. The lower the value, the more conservative it will be, but it will also take more trees to converge. Possible values: `0.01, 0.015, 0.025, 0.05, 0.1`.
* `n_estimators`: Random number of trees in the forest. Possible values: `500, 1000, 2000, 5000, 10000`.
* `max_depth`: Depth of the tree. The default value will be to build the tree until the leaves contain less than the value defined in `min_samples_split`. Possible values: `3, 5, 7, 9, 12, 15, 17, 25, None`.

* `colsample_bytree`: Fraction of columns to be used per round. Possible values: `0.6, 0.7, 0.8, 0.9, 1.0`.
* `subsample`: Fraction of the samples to be used in the next round. Possible values: `0.6, 0.7, 0.8, 0.9, 1.0`.

* `class_weight`: weights of classes in a dictionary. The `balanced` value will define values ​​in inverse proportion to the class frequencies. Possible values: `balanced` and `None`.

In [6]:
t3 = time.time()

pipeline = Pipeline(steps=datapipe + [('xgboost', XGBClassifier())])

parametros = {
    'xgboost__learning_rate': [0.01, ], #[0.1, 0.01, 0.015, 0.025, 0.05, 0.1],
    'xgboost__n_estimators': [500], # [500, 1000, 2000, 5000, 10000],
    'xgboost__max_depth': [3], # [3, 5, 7, 9, 12, 15, 17, 25],
    'xgboost__colsample_bytree': [0.6], # [0.6, 0.7, 0.8, 0.9, 1.0],
    'xgboost__subsample': [0.6], # [0.6, 0.7, 0.8, 0.9, 1.0],
    'xgboost__class_weight': ['balanced', None],
}

grid_search = GridSearchCV(pipeline, parametros, scoring='roc_auc', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print()
print('='*100)
print(grid_search.best_params_)
print('\nDemorou {} segundos para GridSearch para XGBoost.'.format(time.time() - t3))
print('='*100)
print()

Fitting 5 folds for each of 2 candidates, totalling 10 fits
Parameters: { "class_weight" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.



{'xgboost__class_weight': 'balanced', 'xgboost__colsample_bytree': 0.6, 'xgboost__learning_rate': 0.01, 'xgboost__max_depth': 3, 'xgboost__n_estimators': 500, 'xgboost__subsample': 0.6}

Demorou 3.4303171634674072 segundos para GridSearch para XGBoost.



### LightGBM

The main hyperparameters that we can use in GridSearch:

* `learning_rate`: Learning rate (also known as eta) for boosting (between 0 and 1). After each boosting step, the newly added weights are scaled according to this factor. The lower the value, the more conservative it will be, but it will also take more trees to converge. Possible values: `0.01, 0.015, 0.025, 0.05, 0.1`.
* `n_estimators`: Random number of trees in the forest. Possible values: `500, 1000, 2000, 5000, 10000`.
* `max_depth`: Depth of the tree. The default value will be to build the tree until the leaves contain less than the value defined in `min_samples_split`. Possible values: `3, 5, 7, 9, 12, 15, 17, 25, None`.

* `colsample_bytree`: Fraction of columns to be used per round. Possible values: `0.6, 0.7, 0.8, 0.9, 1.0`.
* `subsample`: Fraction of the samples to be used in the next round. Possible values: `0.6, 0.7, 0.8, 0.9, 1.0`.

* `class_weight`: weights of classes in a dictionary. The `balanced` value will define values ​​in inverse proportion to the class frequencies. Possible values: `balanced` and `None`.

In [7]:
t4 = time.time()

pipeline = Pipeline(steps=datapipe + [('lgbm', LGBMClassifier())])

parametros = {
    'lgbm__learning_rate': [0.01, 0.05], # [0.01, 0.015, 0.025, 0.05, 0.1],
    'lgbm__n_estimators': [500], # [500, 1000, 2000, 5000, 10000],
    'lgbm__max_depth': [3], # [3, 5, 7, 9, 12, 15, 17, 25],
    'lgbm__colsample_bytree': [0.6], # [0.6, 0.7, 0.8, 0.9, 1.0],
    'lgbm__subsample': [0.8], # [0.6, 0.7, 0.8, 0.9, 1.0],
    'lgbm__class_weight': ['balanced', None],
}

grid_search = GridSearchCV(pipeline, parametros, scoring='roc_auc', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print()
print('='*100)
print(grid_search.best_params_)
print('\nDemorou {} segundos para GridSearch para LGBM.'.format(time.time() - t4))
print('='*100)
print()

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Parameters: { "class_weight" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002507 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 868
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ov

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001553 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 864
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000






[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001676 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 876
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Number

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001581 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001284 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 876
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005292 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 868
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724


[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001416 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 871
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724








[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001237 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 864
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001389 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 868
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001352 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 876
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208




[LightGBM] [Info] Number of positive: 1332, number of negative: 2163
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001545 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 908
[LightGBM] [Info] Number of data points in the train set: 3495, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381116 -> initscore=-0.484815
[LightGBM] [Info] Start training from score -0.484815



{'lgbm__class_weight': None, 'lgbm__colsample_bytree': 0.6, 'lgbm__learning_rate': 0.05, 'lgbm__max_depth': 3, 'lgbm__n_estimators': 500, 'lgbm__subsample': 0.8}

Demorou 12.654129981994629 segundos para GridSearch para LGBM.



### SVM

The main hyperparameters that we can use in GridSearch:

* `C`: Penalty parameter, the lower the value, the narrower the decision boundary (soft margin). Default value 1.0. Possible values: `0.001, 0.01, 0.1, 1, 10, 100, ...`
* `gamma`: Coefficient used for non-linear hyperplanes. The higher the gamma value, the greater the fit to the data. This can lead to overfitting Possible values: `scale` and `auto`.
* `kernel`: Kernel type: `linear`, `rbf` (default), `poly`, `sigmoid`, `precomputed` or any function.
* `class_weight`: weights of classes in a dictionary. The `balanced` value will define values ​​in inverse proportion to the class frequencies. Possible values: `balanced` and `None`.

In [9]:
t5 = time.time()

std_scaller = [('numeric_scaler', SklearnTransformerWrapper(variables=num_vars, transformer=StandardScaler()))]

pipeline = Pipeline(steps=datapipe + std_scaller + [('svc', SVC())])

parametros = {
    'svc__C': [0.01], # [0.001, 0.01, 0.1, 1, 10, 100],
    'svc__gamma': ['scale'], # ['scale', 'auto'],
    'svc__kernel': ['rbf', 'poly'], # ['rbf', 'poly', 'linear', 'sigmoid'],
    'svc__class_weight': ['balanced', None]
}
grid_search = GridSearchCV(pipeline, parametros, scoring='roc_auc', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print()
print('='*100)
print(grid_search.best_params_)
print('\nDemorou {} segundos para GridSearch para SVC.'.format(time.time() - t5))
print('='*100)
print()

Fitting 5 folds for each of 4 candidates, totalling 20 fits

{'svc__C': 0.01, 'svc__class_weight': None, 'svc__gamma': 'scale', 'svc__kernel': 'poly'}

Demorou 1.6524007320404053 segundos para GridSearch para SVC.



### Logistic Regression

The main hyperparameters that we can use in GridSearch:

* `C`: Positive floating point. Inverse of the regularization force. A smaller value means stronger regularization. Possible values: `0.001, 0.01, 0.1, 1, 10, 100, ...`

* `penalty`: Penalty standard `none` and `l2`. 

* `class_weight`: weights of classes in a dictionary. The `balanced` value will define values ​​in inverse proportion to the class frequencies. Possible values: `balanced` and `None`.


* `fit_intercept`: Adds bias to the decision function. Possible values: True/False.

In [11]:
t6 = time.time()

std_scaller = [('numeric_scaler', SklearnTransformerWrapper(variables=num_vars, transformer=StandardScaler()))]

pipeline = Pipeline(steps=datapipe + std_scaller + [('logit', LogisticRegression())])

parametros = {
    'logit__penalty': ['none', 'l2'],
    'logit__C': [0.001, 0.01, 0.1, 1, 10, 100],
    'logit__class_weight': ['balanced', None],
    'logit__fit_intercept': [True, False]
}
grid_search = GridSearchCV(pipeline, parametros, scoring='roc_auc', cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print()
print('='*100)
print(grid_search.best_params_)
print('\nDemorou {} segundos para GridSearch para Regressão Logistica.'.format(time.time() - t6))
print('='*100)
print()

Fitting 5 folds for each of 48 candidates, totalling 240 fits


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt


{'logit__C': 0.01, 'logit__class_weight': 'balanced', 'logit__fit_intercept': False, 'logit__penalty': 'l2'}

Demorou 2.774195909500122 segundos para GridSearch para Regressão Logistica.



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt