# Modeling - XGBoost

In [1]:
import sys
sys.path.append('../../src')

import pandas as pd
import numpy as np
import pickle
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from modeling import train_model, save_model
from tuning import random_search_tuning
from evaluator import XGBoostEvaluator

## Parameters for tuning

In [4]:
parameters = {
    'learning_rate': np.arange(0.001, 0.1, 0.005),
    'max_depth': np.arange(2, 8),
    'n_estimators': np.arange(50, 150, 10),
    'subsample': np.arange(0.3, 0.9, 0.1),
    'colsample_bytree': np.arange(0.6, 1.0, 0.05),
    'gamma': np.arange(0.1, 5, 0.1)
}

## Modeling

In [4]:

polos = ['Porto Alegre', 'Marabá', 'Brasília', 'Belo Horizonte', 'Juazeiro do Norte', 'Recife']
models = []
for p in polos:
    print(f"`Polo {p} rodando")
    print("=====================================")
    clf = XGBClassifier()
    model = random_search_tuning(clf, parameters)
    model = train_model(model, "multi", polo=p)
    print(model.best_params_)
    models.append(model)

`Polo Porto Alegre rodando
{'subsample': 0.7000000000000002, 'n_estimators': 70, 'max_depth': 3, 'learning_rate': 0.051000000000000004, 'gamma': 0.1, 'colsample_bytree': 0.7000000000000001}


## Save Model

In [7]:
for i in range(0, len(models)):

    pickle.dump(models[i], open(f"../../data/models/polos/xgb_{polos[i]}.sav", 'wb'))