# Modeling - XGBoost

In [1]:
import sys
sys.path.append('../../src')


import numpy as np
import pickle
from xgboost import XGBClassifier
from modeling import train_model, save_model
from tuning import random_search_tuning
from utils import  generate_combinations, generate_dataset_split, save_combination
from itertools import combinations


## Parameters for tuning

In [2]:
parameters = {
    'objective': ['binary:logistic'],
    'eval_metric': ['auc'],
    # 'scale_pos_weight': np.arange(0, 30, 5),
    'learning_rate': np.arange(0.001, 0.1, 0.005),
    'max_depth': np.arange(2, 8),
    'n_estimators': np.arange(50, 150, 10),
    'subsample': np.arange(0.3, 0.9, 0.1),
    'colsample_bytree': np.arange(0.6, 1.0, 0.05),
    'gamma': np.arange(0.1, 5, 0.1),
    'early_stopping_rounds': np.arange(5, 15, 5)
}

## Modeling

In [3]:


cities_siglas = {
    "A": "Porto Alegre",
    "B": "Marabá",
    "C": "Brasília",
    "D": "Belo Horizonte",
    "E": "Juazeiro do Norte",
    "F": "Recife"
}

polos_sigla = ['A', 'B', 'C', 'D', 'E', 'F']
polos = [cities_siglas[s] for s in polos_sigla]

In [6]:
splits = []
for i in range(0, len(polos_sigla)):
    splits.append(generate_combinations(polos[:i] + polos[i+1:], 4, 1))
for i in range(0, len(splits)):
    models = []
    save_combination(f'split4_1/{cities[i]}', splits[i])
    for idx, combination in enumerate(splits[i], start=0):
        print(f"Combinação {idx}: {combination} training")
        X_train, y_train = generate_dataset_split(combination[0])
        X_val, y_val = generate_dataset_split(combination[1])
        clf = XGBClassifier()
        model = random_search_tuning(clf, parameters)
        model = train_model(model, X_train, y_train, [(X_val, y_val)])
        print(model.best_params_)
        models.append(model)
    for j in range(0, len(models)):
        pickle.dump(models[j], open(f"../../data/models/split4_1/{cities[i]}/xgb_{j}.sav", 'wb'))
        
    

Combinação 0: [['Porto Alegre', 'Marabá', 'Brasília', 'Belo Horizonte'], ['Juazeiro do Norte']] training
['Porto Alegre', 'Marabá', 'Brasília', 'Belo Horizonte']
['Juazeiro do Norte']


KeyboardInterrupt: 