In [1]:
# LIBRARIES
import snowflake.connector
import logging
from os import environ
from dotenv import load_dotenv
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, KFold
import optuna
import joblib
import numpy as np

In [2]:
# SNOWFLAKE CONNECTION 
load_dotenv()
for logger_name in ['snowflake.connector', 'botocore']:
    logger = logging.getLogger(logger_name)
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setLevel(logging.DEBUG)
    logger.addHandler(handler)

conn = snowflake.connector.connect(
    user = environ['USER'],
    password = environ['PASSWORD'],
    account = environ['ACCOUNT'],
    warehouse = environ['WAREHOUSE'],
    database = environ['DATABASE'],
    schema = environ['SCHEMA']
)

Snowflake Connector for Python Version: 3.12.2, Python Version: 3.10.14, Platform: Windows-10-10.0.22631-SP0
connect
__config
Connecting to GLOBAL Snowflake domain
This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
use_numpy: False
initialized
REST API object was created: fa62087.us-east-2.aws.snowflakecomputing.com:443
authenticate
assertion content: *********
account=fa62087, user=PAULO_MAIA, database=POKER_PROD, schema=POKER, warehouse=REPORTING_POKER_WH, role=None, request_id=02d264df-48f3-4b84-8e20-77e0f07136ab
body['data']: {'CLIENT_APP_ID': 'PythonConnector', 'CLIENT_APP_VERSION': '3.12.2', 'SVN_REVISION': None, 'ACCOUNT_NAME': 'fa62087', 'LOGIN_NAME': 'PAULO_MAIA', 'CLIENT_ENVIRONMENT': {'APPLICATION': 'PythonConnector', 'OS': 'Windows', 'OS_VERSION': 'Windows-10-10.0.22631-SP0', 'PYTHON_VERSI

In [3]:
# SELECTING THE QUERY AND GETTING THE DATA
with open('training_query.sql', 'r') as file:
    query = file.read()
    
data = pd.read_sql_query(query, conn)

  data = pd.read_sql_query(query, conn)
cursor
executing SQL/command
query: [WITH counts AS ( SELECT start_date_local, start_hour_local, COUNT(*) AS same_hou...]
binding: [WITH counts AS ( SELECT start_date_local, start_hour_local, COUNT(*) AS same_hou...] with input=[None], processed=[{}]
sequence counter: 1
Request id: 5f22852d-8eff-44bf-a9b0-b52af04be848
running query [WITH counts AS ( SELECT start_date_local, start_hour_local, COUNT(*) AS same_hou...]
is_file_transfer: True
_cmd_query
serialize_to_dict() called
sql=[WITH counts AS ( SELECT start_date_local, start_hour_local, COUNT(*) AS same_hou...], sequence_id=[1], is_file_transfer=[False]
Opentelemtry otel injection failed because of: No module named 'opentelemetry'
Session status for SessionPool 'fa62087.us-east-2.aws.snowflakecomputing.com', SessionPool 1/1 active sessions
remaining request timeout: N/A ms, retry cnt: 1
Request guid: ac0e2cca-ce09-4c9c-90d8-bbbbabc1e5bd
socket timeout: 60
https://fa62087.us-east-2.aws.snowflak

In [5]:
# Converter para o formato de data
data['START_DATE_LOCAL'] = pd.to_datetime(data['START_DATE_LOCAL'])
# Definir o tamanho das janelas
training_window = pd.DateOffset(months=11)
validation_window = pd.DateOffset(months=1)

# Criar o loop das janelas
start_date = pd.to_datetime('2023-07-30')  # Primeira janela de validação
end_date = (pd.Timestamp.now() - pd.DateOffset(days=1)).date()
end_date = pd.Timestamp(end_date)  # Converte para Timestamp# Última janela de validação
current_date = start_date


In [None]:
# ENCODING CATEGORICAL COLUMNS AND DEFINING X AND Y
encoder = OneHotEncoder(sparse_output=False)
data_transform = data.drop(columns=['MATCH_ID'])
cat_columns = ['START_HOUR_LOCAL', 'DAY_OF_WEEK_LOCAL', 'KO_TYPE', 
               'MTT_POOL_ALLOCATION_DESCRIPTION', 'ESTRUTURA_BLINDS']
encoded_columns = encoder.fit_transform(data_transform[cat_columns])
encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(cat_columns))
data_final = pd.concat([data_transform.drop(cat_columns, axis=1), encoded_df], axis=1)

X = data_final.drop(columns=['HAS_OVERLAY', 'OVERLAY', 'MTT_RESULT', 'COLLECTED', 'NORMALIZED_MTT_RESULT', 'NORMALIZED_COLLECTED' ], axis=1)
y = data_final['COLLECTED']

In [34]:
while current_date <= end_date:
    # Definir os períodos de treinamento e validação
    training_start = current_date - training_window
    training_end = current_date - pd.Timedelta(days=1)  # Um dia antes da validação
    validation_start = current_date
    validation_end = current_date + validation_window - pd.Timedelta(days=1)  # Último dia do mês

    # Filtrar os dados para treinamento e validação
    training_data = data_final[(data_final['START_DATE_LOCAL'] >= training_start) & (data_final['START_DATE_LOCAL'] <= training_end)]
    validation_data = data_final[(data_final['START_DATE_LOCAL'] >= validation_start) & (data_final['START_DATE_LOCAL'] <= validation_end)]

    # Aqui você pode treinar e validar o modelo
    print(f"Treinamento: {training_start.date()} a {training_end.date()}")
    print(f"Validação: {validation_start.date()} a {validation_end.date()}\n")

    # Avançar a janela para o próximo mês
    current_date += pd.DateOffset(months=1)

Treinamento: 2022-08-30 a 2023-07-29
Validação: 2023-07-30 a 2023-08-29

Treinamento: 2022-09-30 a 2023-08-29
Validação: 2023-08-30 a 2023-09-29

Treinamento: 2022-10-30 a 2023-09-29
Validação: 2023-09-30 a 2023-10-29

Treinamento: 2022-11-30 a 2023-10-29
Validação: 2023-10-30 a 2023-11-29

Treinamento: 2022-12-30 a 2023-11-29
Validação: 2023-11-30 a 2023-12-29

Treinamento: 2023-01-30 a 2023-12-29
Validação: 2023-12-30 a 2024-01-29

Treinamento: 2023-02-28 a 2024-01-29
Validação: 2024-01-30 a 2024-02-28

Treinamento: 2023-03-29 a 2024-02-28
Validação: 2024-02-29 a 2024-03-28

Treinamento: 2023-04-29 a 2024-03-28
Validação: 2024-03-29 a 2024-04-28

Treinamento: 2023-05-29 a 2024-04-28
Validação: 2024-04-29 a 2024-05-28

Treinamento: 2023-06-29 a 2024-05-28
Validação: 2024-05-29 a 2024-06-28

Treinamento: 2023-07-29 a 2024-06-28
Validação: 2024-06-29 a 2024-07-28

Treinamento: 2023-08-29 a 2024-07-28
Validação: 2024-07-29 a 2024-08-28

Treinamento: 2023-09-29 a 2024-08-28
Validação: 202

In [6]:
# DEFINING THE CROSS-VALIDATION FUNCTION FOR XGBOOST REGRESSOR MODEL WITH OPTUNA
def cross_validation_optuna(trial, X, y, n_splits):
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 3, 20)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.8)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    gamma = trial.suggest_float('gamma', 0, 5)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-5, 1e1, log=True)
    reg_lambda = trial.suggest_float('reg_lambda', 1e-5, 1e1, log=True)


    model = xgb.XGBRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        gamma = gamma,
        min_child_weight = min_child_weight,
        reg_alpha = reg_alpha,
        reg_lambda = reg_lambda,
        device='cuda',
        random_state=42
    )

    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    results_list = []

    for train_idx, test_idx in kfold.split(X):
        X_train, X_test = X.iloc[train_idx, :], X.iloc[test_idx, :]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

        # CREATION OF A DATAFRAME TO SAVE ALL THE RESULTS
        results_df = pd.DataFrame({
            'Actual': y_test,
            'Prediction': predictions,
        })

        # ADDING ADDITIONAL METRICS TO THE DATAFRAME
        results_df['Absolute difference'] = np.abs(results_df['Actual'] - results_df['Prediction'])
        results_df['Mean square difference'] = (results_df['Actual'] - results_df['Prediction']) ** 2
        results_df['Error percentage'] = (results_df['Actual'] - results_df['Prediction']) / results_df['Actual'] * 100
        results_df['Absolute error percentage'] = np.abs(results_df['Error percentage'])

        results_list.append(results_df)

    full_results_df = pd.concat(results_list, ignore_index=True)

    return full_results_df, model


# DEFINING THE OPTIMIZATION FUNCTION FOR HYPERPARAMETER TUNING
def optimize_hyperparameters(X, y):
    study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.MedianPruner())

    def objective(trial):
        full_results_df, model = cross_validation_optuna(trial, X, y, n_splits=10)
        trial.set_user_attr("model", model)
        trial.set_user_attr("results", full_results_df)
        return full_results_df['Absolute difference'].median()  # Using MSE as the optimization target

    study.optimize(objective, n_trials=40)

    print("Best hyperparameters: ", study.best_params)

    best_trial = study.best_trial
    best_model = best_trial.user_attrs["model"]
    best_results = best_trial.user_attrs["results"]

    return best_model, best_results

# RUNNING THE MODEL 
best_model, best_results_df = optimize_hyperparameters(X, y)

[I 2024-11-19 16:08:08,266] A new study created in memory with name: no-name-47dfe1bd-2198-4035-bc20-1fed715fb3c0
Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


[I 2024-11-19 16:08:15,937] Trial 0 finished with value: 35.31689453125 and parameters: {'n_estimators': 146, 'max_depth': 8, 'learning_rate': 0.46392456008558997, 'subsample': 0.8295531185538675, 'colsample_bytree': 0.5115167272086848, 'gamma': 0.043116108372410755, 'min_child_weight': 6, 'reg_alpha': 0.004871467304646351, 'reg_lambda': 0.003066405369570264}. Best is trial 0 with value: 35.31689453125.
[I 2024-11-19 16:08:52,825] Trial 1 finished with value: 31.567446289062445 and parameters: {'n_estimators': 111, 'max_depth': 19, 'learning_rate': 0.5690463012097565, 'subsample': 0.7306962208926164, 'colsample_bytree': 0.8053797761251064, 'gamma': 4.263712670466595, 'min_child_weight': 1, 'reg_alpha': 0.00021439295213671

Best hyperparameters:  {'n_estimators': 110, 'max_depth': 16, 'learning_rate': 0.06816448127304066, 'subsample': 0.9112424040826942, 'colsample_bytree': 0.6819324010072926, 'gamma': 0.016392012530653144, 'min_child_weight': 6, 'reg_alpha': 0.0007971807194501131, 'reg_lambda': 0.02278351502837931}


In [7]:
# SAVING THE MODEL AND THE ENCODER
joblib.dump(encoder, '../new_model/history_encoder.pkl')
joblib.dump(best_model, '../new_model/history_model.pkl')

['../new_model/history_model.pkl']

In [36]:
# ENCODING CATEGORICAL COLUMNS AND DEFINING X AND Y
encoder = OneHotEncoder(sparse_output=False)
data_transform = data.drop(columns=['MATCH_ID', 'START_DATE_LOCAL'])
cat_columns = ['START_HOUR_LOCAL', 'DAY_OF_WEEK_LOCAL', 'KO_TYPE', 
               'MTT_POOL_ALLOCATION_DESCRIPTION', 'ESTRUTURA_BLINDS']
encoded_columns = encoder.fit_transform(data_transform[cat_columns])
encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(cat_columns))
data_final = pd.concat([data_transform.drop(cat_columns, axis=1), encoded_df], axis=1)

X = data_final.drop(columns=['HAS_OVERLAY', 'OVERLAY', 'MTT_RESULT', 'COLLECTED', 'NORMALIZED_MTT_RESULT', 'NORMALIZED_COLLECTED' ], axis=1)
y = data_final['COLLECTED']

In [None]:
from sklearn.model_selection import TimeSeriesSplit

# FUNÇÃO DE VALIDAÇÃO CRUZADA PARA XGBOOST COM JANELA DESLIZANTE
def cross_validation_optuna_time_series(trial, X, y, train_months, test_months):
    # Sugerindo hiperparâmetros
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 3, 20)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.8)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    gamma = trial.suggest_float('gamma', 0, 5)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-5, 1e1, log=True)
    reg_lambda = trial.suggest_float('reg_lambda', 1e-5, 1e1, log=True)

    # Inicializando o modelo
    model = xgb.XGBRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        gamma=gamma,
        min_child_weight=min_child_weight,
        reg_alpha=reg_alpha,
        reg_lambda=reg_lambda,
        device='cuda',
        random_state=42
    )

    # Configurando a validação temporal com janela deslizante
    tscv = TimeSeriesSplit(gap=0, test_size=test_months, train_size=train_months)
    results_list = []

    for train_idx, test_idx in tscv.split(X):
        X_train, X_test = X.iloc[train_idx, :], X.iloc[test_idx, :]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

        # Criando um DataFrame para salvar os resultados
        results_df = pd.DataFrame({
            'Actual': y_test,
            'Prediction': predictions,
        })

        # Adicionando métricas ao DataFrame
        results_df['Absolute difference'] = np.abs(results_df['Actual'] - results_df['Prediction'])
        results_df['Mean square difference'] = (results_df['Actual'] - results_df['Prediction']) ** 2
        results_df['Error percentage'] = (results_df['Actual'] - results_df['Prediction']) / results_df['Actual'] * 100
        results_df['Absolute error percentage'] = np.abs(results_df['Error percentage'])

        results_list.append(results_df)

    # Consolidando os resultados
    full_results_df = pd.concat(results_list, ignore_index=True)

    return full_results_df, model

# ALTERANDO A FUNÇÃO DE OTIMIZAÇÃO PARA UTILIZAR A VALIDAÇÃO TEMPORAL
def optimize_hyperparameters_time_series(X, y):
    study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.MedianPruner())

    def objective(trial):
        # Usando 11 meses para treinamento e 1 mês para validação
        full_results_df, model = cross_validation_optuna_time_series(trial, X, y, train_months=11, test_months=1)
        trial.set_user_attr("model", model)
        trial.set_user_attr("results", full_results_df)
        return full_results_df['Absolute difference'].median()  # Usando o erro absoluto mediano como métrica de otimização

    study.optimize(objective, n_trials=40)

    print("Best hyperparameters: ", study.best_params)

    best_trial = study.best_trial
    best_model = best_trial.user_attrs["model"]
    best_results = best_trial.user_attrs["results"]

    return best_model, best_results

# EXECUTANDO A OTIMIZAÇÃO
best_model, best_results_df = optimize_hyperparameters_time_series(X, y)


In [6]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold
import optuna
import pickle

# Encoding das colunas categóricas
encoder = OneHotEncoder(sparse_output=False)
data_transform = data.drop(columns=['MATCH_ID'])
cat_columns = ['START_HOUR_LOCAL', 'DAY_OF_WEEK_LOCAL', 'KO_TYPE', 
               'MTT_POOL_ALLOCATION_DESCRIPTION', 'ESTRUTURA_BLINDS']
encoded_columns = encoder.fit_transform(data_transform[cat_columns])
encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(cat_columns))
data_final = pd.concat([data_transform.drop(cat_columns, axis=1), encoded_df], axis=1)

# Preparar X e y
X = data_final.drop(columns=['HAS_OVERLAY', 'OVERLAY', 'MTT_RESULT', 'COLLECTED', 'NORMALIZED_MTT_RESULT', 
                             'NORMALIZED_COLLECTED', 'START_DATE_LOCAL'], axis=1)  # Remove a coluna de data aqui
y = data_final['COLLECTED']

# Função para otimizar hiperparâmetros
def optimize_hyperparameters(X_train, y_train, period_label):
    study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.MedianPruner())

    def objective(trial):
        # Hiperparâmetros
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        max_depth = trial.suggest_int('max_depth', 3, 20)
        learning_rate = trial.suggest_float('learning_rate', 0.01, 0.8)
        subsample = trial.suggest_float('subsample', 0.5, 1.0)
        colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
        gamma = trial.suggest_float('gamma', 0, 5)
        min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
        reg_alpha = trial.suggest_float('reg_alpha', 1e-5, 1e1, log=True)
        reg_lambda = trial.suggest_float('reg_lambda', 1e-5, 1e1, log=True)

        model = xgb.XGBRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            gamma=gamma,
            min_child_weight=min_child_weight,
            reg_alpha=reg_alpha,
            reg_lambda=reg_lambda,
            device='cuda',
            random_state=42
        )

        kfold = KFold(n_splits=5, shuffle=True, random_state=42)
        scores = []

        for train_idx, test_idx in kfold.split(X_train):
            X_train_fold, X_test_fold = X_train.iloc[train_idx], X_train.iloc[test_idx]
            y_train_fold, y_test_fold = y_train.iloc[train_idx], y_train.iloc[test_idx]

            model.fit(X_train_fold, y_train_fold)
            predictions = model.predict(X_test_fold)

            # Mean Absolute Error como métrica
            mae = np.mean(np.abs(predictions - y_test_fold))
            scores.append(mae)

        return np.mean(scores)

    # Rodar a otimização
    study.optimize(objective, n_trials=40)

    # Treinar modelo final com os melhores hiperparâmetros
    best_params = study.best_params
    final_model = xgb.XGBRegressor(**best_params, device='cuda', random_state=42)
    final_model.fit(X_train, y_train)

    # Salvar o modelo
    filename = f"xgb_model_{period_label}.pkl"
    with open(filename, "wb") as file:
        pickle.dump(final_model, file)
    print(f"Modelo salvo como: {filename}")

    return final_model

# Loop temporal
data_final['START_DATE_LOCAL'] = pd.to_datetime(data_final['START_DATE_LOCAL'])
training_window = pd.DateOffset(months=11)
validation_window = pd.DateOffset(months=1)
start_date = pd.to_datetime('2023-07-30')
end_date = pd.Timestamp(pd.Timestamp.now() - pd.DateOffset(days=1))

current_date = start_date

while current_date <= end_date:
    # Definir janelas de treinamento e validação
    training_start = current_date - training_window
    training_end = current_date - pd.Timedelta(days=1)
    validation_start = current_date
    validation_end = current_date + validation_window - pd.Timedelta(days=1)

    training_data = data_final[(data_final['START_DATE_LOCAL'] >= training_start) & (data_final['START_DATE_LOCAL'] <= training_end)]
    validation_data = data_final[(data_final['START_DATE_LOCAL'] >= validation_start) & (data_final['START_DATE_LOCAL'] <= validation_end)]

    X_train = training_data.drop(columns=['COLLECTED', 'START_DATE_LOCAL'])  # Certifique-se de excluir a coluna de data
    y_train = training_data['COLLECTED']

    X_valid = validation_data.drop(columns=['COLLECTED', 'START_DATE_LOCAL'])
    y_valid = validation_data['COLLECTED']

    # Nome do período atual
    period_label = f"{training_start.date()}_to_{validation_end.date()}"

    # Otimizar os hiperparâmetros para a janela atual
    print(f"Tunando hiperparâmetros para período de {training_start.date()} a {validation_end.date()}")
    best_model = optimize_hyperparameters(X_train, y_train, period_label)

    # Avançar janela
    current_date += pd.DateOffset(months=1)


[I 2024-11-29 12:15:48,186] A new study created in memory with name: no-name-294d6558-4806-4d22-9b2e-590b73c2a5d7


Tunando hiperparâmetros para período de 2022-08-30 a 2023-08-29


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


[I 2024-11-29 12:15:52,761] Trial 0 finished with value: 21.579745441040927 and parameters: {'n_estimators': 198, 'max_depth': 7, 'learning_rate': 0.5463167860909405, 'subsample': 0.8100166315977635, 'colsample_bytree': 0.8798264524073864, 'gamma': 1.869642262935295, 'min_child_weight': 3, 'reg_alpha': 0.00034043266433635773, 'reg_lambda': 0.001020651318136569}. Best is trial 0 with value: 21.579745441040927.
[I 2024-11-29 12:15:58,183] Trial 1 finished with value: 35.23490359462781 and parameters: {'n_estimators': 174, 'max_depth': 10, 'learning_rate': 0.7106960124857787, 'subsample': 0.7574584065703855, 'colsample_bytree': 0.7585999036929796, 'gamma': 1.0453994809155576, 'min_child_weight': 2, 'reg_alpha': 3.533827559396275e-05, 'reg_lambda': 0.5536480005700402}. Best is trial 0 with value: 21.579745441040927.
[I 2024-11-29 12:16:0

Modelo salvo como: xgb_model_2022-08-30_to_2023-08-29.pkl
Tunando hiperparâmetros para período de 2022-09-30 a 2023-09-29


[I 2024-11-29 12:21:00,211] Trial 0 finished with value: 31.59114790902775 and parameters: {'n_estimators': 192, 'max_depth': 20, 'learning_rate': 0.3195678088389851, 'subsample': 0.6464378675319098, 'colsample_bytree': 0.6557641931435344, 'gamma': 2.041217415887196, 'min_child_weight': 4, 'reg_alpha': 0.00242096289708799, 'reg_lambda': 0.8021680681438014}. Best is trial 0 with value: 31.59114790902775.
[I 2024-11-29 12:21:06,223] Trial 1 finished with value: 60.099846614121006 and parameters: {'n_estimators': 141, 'max_depth': 18, 'learning_rate': 0.7110962872657661, 'subsample': 0.5202260485952838, 'colsample_bytree': 0.5563596723021348, 'gamma': 2.796931632450046, 'min_child_weight': 10, 'reg_alpha': 0.00046177251330000184, 'reg_lambda': 0.004195673141025066}. Best is trial 0 with value: 31.59114790902775.
[I 2024-11-29 12:21:08,034] Trial 2 finished with value: 32.35436895905624 and parameters: {'n_estimators': 130, 'max_depth': 4, 'learning_rate': 0.6823897256078283, 'subsample': 

Modelo salvo como: xgb_model_2022-09-30_to_2023-09-29.pkl
Tunando hiperparâmetros para período de 2022-10-30 a 2023-10-29


[I 2024-11-29 12:24:07,648] Trial 0 finished with value: 17.935300802521965 and parameters: {'n_estimators': 147, 'max_depth': 14, 'learning_rate': 0.23079849728264779, 'subsample': 0.8748912192116471, 'colsample_bytree': 0.8312028709464663, 'gamma': 3.614614349360675, 'min_child_weight': 5, 'reg_alpha': 3.473762433533894e-05, 'reg_lambda': 0.005563174088357416}. Best is trial 0 with value: 17.935300802521965.
[I 2024-11-29 12:24:10,947] Trial 1 finished with value: 35.70100232405937 and parameters: {'n_estimators': 68, 'max_depth': 16, 'learning_rate': 0.5026215187717633, 'subsample': 0.9919883987441154, 'colsample_bytree': 0.7556277164461827, 'gamma': 1.8561961207684745, 'min_child_weight': 8, 'reg_alpha': 0.04690273740040255, 'reg_lambda': 0.003400089785687547}. Best is trial 0 with value: 17.935300802521965.
[I 2024-11-29 12:24:16,501] Trial 2 finished with value: 36.441604378641685 and parameters: {'n_estimators': 116, 'max_depth': 16, 'learning_rate': 0.2755011864495492, 'subsamp

Modelo salvo como: xgb_model_2022-10-30_to_2023-10-29.pkl
Tunando hiperparâmetros para período de 2022-11-30 a 2023-11-29


[I 2024-11-29 12:27:22,957] Trial 0 finished with value: 34.346354913716176 and parameters: {'n_estimators': 180, 'max_depth': 4, 'learning_rate': 0.33291255146837795, 'subsample': 0.8924021038957167, 'colsample_bytree': 0.6324035172684863, 'gamma': 1.9858931358214837, 'min_child_weight': 6, 'reg_alpha': 0.021114226067329798, 'reg_lambda': 0.03401659195362071}. Best is trial 0 with value: 34.346354913716176.
[I 2024-11-29 12:27:25,959] Trial 1 finished with value: 43.451986688191205 and parameters: {'n_estimators': 59, 'max_depth': 17, 'learning_rate': 0.33636477302758655, 'subsample': 0.6598246729201775, 'colsample_bytree': 0.6265141052591547, 'gamma': 4.527979567213454, 'min_child_weight': 8, 'reg_alpha': 0.11908734845693798, 'reg_lambda': 0.005952554900369011}. Best is trial 0 with value: 34.346354913716176.
[I 2024-11-29 12:27:28,224] Trial 2 finished with value: 30.323108639052116 and parameters: {'n_estimators': 142, 'max_depth': 5, 'learning_rate': 0.36016715162924723, 'subsampl

Modelo salvo como: xgb_model_2022-11-30_to_2023-11-29.pkl
Tunando hiperparâmetros para período de 2022-12-30 a 2023-12-29


[I 2024-11-29 12:30:19,511] Trial 0 finished with value: 32.52068644168313 and parameters: {'n_estimators': 139, 'max_depth': 18, 'learning_rate': 0.10795865136171169, 'subsample': 0.6694911107162604, 'colsample_bytree': 0.554231392007928, 'gamma': 0.7079401052889667, 'min_child_weight': 1, 'reg_alpha': 0.15117920711140198, 'reg_lambda': 0.02105417578552265}. Best is trial 0 with value: 32.52068644168313.
[I 2024-11-29 12:30:21,336] Trial 1 finished with value: 45.49741275411634 and parameters: {'n_estimators': 159, 'max_depth': 3, 'learning_rate': 0.4635499729882723, 'subsample': 0.9654289236282813, 'colsample_bytree': 0.6913960850371796, 'gamma': 0.7994908577048171, 'min_child_weight': 8, 'reg_alpha': 0.018418123878139948, 'reg_lambda': 0.002271094595222523}. Best is trial 0 with value: 32.52068644168313.
[I 2024-11-29 12:30:28,226] Trial 2 finished with value: 32.738045801159956 and parameters: {'n_estimators': 126, 'max_depth': 19, 'learning_rate': 0.23248101428037815, 'subsample':

Modelo salvo como: xgb_model_2022-12-30_to_2023-12-29.pkl
Tunando hiperparâmetros para período de 2023-01-30 a 2024-01-29


[I 2024-11-29 12:34:28,367] Trial 0 finished with value: 20.130414495877243 and parameters: {'n_estimators': 164, 'max_depth': 16, 'learning_rate': 0.5069065053909426, 'subsample': 0.6588934189177665, 'colsample_bytree': 0.9998473018858847, 'gamma': 4.639012431456903, 'min_child_weight': 3, 'reg_alpha': 0.004187218286878574, 'reg_lambda': 0.0020424063626480822}. Best is trial 0 with value: 20.130414495877243.
[I 2024-11-29 12:34:31,098] Trial 1 finished with value: 64.70429075500317 and parameters: {'n_estimators': 61, 'max_depth': 12, 'learning_rate': 0.054969074202831654, 'subsample': 0.9150636775777026, 'colsample_bytree': 0.5743902745658662, 'gamma': 0.18506387210384556, 'min_child_weight': 3, 'reg_alpha': 0.9834078986621473, 'reg_lambda': 1.934065325440343}. Best is trial 0 with value: 20.130414495877243.
[I 2024-11-29 12:34:35,124] Trial 2 finished with value: 22.420398492771298 and parameters: {'n_estimators': 151, 'max_depth': 9, 'learning_rate': 0.49971967442219506, 'subsample

Modelo salvo como: xgb_model_2023-01-30_to_2024-01-29.pkl
Tunando hiperparâmetros para período de 2023-02-28 a 2024-02-28


[I 2024-11-29 12:37:14,207] Trial 0 finished with value: 37.90554983125786 and parameters: {'n_estimators': 181, 'max_depth': 11, 'learning_rate': 0.42553602171149274, 'subsample': 0.7764821395539522, 'colsample_bytree': 0.566980730272432, 'gamma': 2.811805635235154, 'min_child_weight': 4, 'reg_alpha': 0.0002921517323783461, 'reg_lambda': 1.8442966615855938}. Best is trial 0 with value: 37.90554983125786.
[I 2024-11-29 12:37:19,997] Trial 1 finished with value: 19.211000607328554 and parameters: {'n_estimators': 185, 'max_depth': 11, 'learning_rate': 0.45180394152480513, 'subsample': 0.6133785687682765, 'colsample_bytree': 0.8769924650481971, 'gamma': 4.132299702550855, 'min_child_weight': 3, 'reg_alpha': 2.376102255792797, 'reg_lambda': 0.0004325786382443712}. Best is trial 1 with value: 19.211000607328554.
[I 2024-11-29 12:37:24,378] Trial 2 finished with value: 41.990299669219986 and parameters: {'n_estimators': 178, 'max_depth': 8, 'learning_rate': 0.5130246790545778, 'subsample': 

Modelo salvo como: xgb_model_2023-02-28_to_2024-02-28.pkl
Tunando hiperparâmetros para período de 2023-03-29 a 2024-03-28


[I 2024-11-29 12:41:02,689] Trial 0 finished with value: 22.68246443039029 and parameters: {'n_estimators': 157, 'max_depth': 14, 'learning_rate': 0.43961042088019714, 'subsample': 0.772974530060934, 'colsample_bytree': 0.7700870166348428, 'gamma': 4.781779788573922, 'min_child_weight': 5, 'reg_alpha': 0.0002308179301447933, 'reg_lambda': 0.8213852087857485}. Best is trial 0 with value: 22.68246443039029.
[I 2024-11-29 12:41:04,340] Trial 1 finished with value: 42.62467881424564 and parameters: {'n_estimators': 137, 'max_depth': 3, 'learning_rate': 0.7199598456827193, 'subsample': 0.9004733970275558, 'colsample_bytree': 0.5725855831376104, 'gamma': 1.4235462113916486, 'min_child_weight': 6, 'reg_alpha': 0.008221417065765265, 'reg_lambda': 0.7774147248602284}. Best is trial 0 with value: 22.68246443039029.
[I 2024-11-29 12:41:09,496] Trial 2 finished with value: 29.076405571772135 and parameters: {'n_estimators': 137, 'max_depth': 14, 'learning_rate': 0.2659753250680868, 'subsample': 0.

Modelo salvo como: xgb_model_2023-03-29_to_2024-03-28.pkl
Tunando hiperparâmetros para período de 2023-04-29 a 2024-04-28


[I 2024-11-29 12:45:44,676] Trial 0 finished with value: 36.89783402413417 and parameters: {'n_estimators': 97, 'max_depth': 5, 'learning_rate': 0.7009172752023201, 'subsample': 0.5865802462373678, 'colsample_bytree': 0.8842876871318641, 'gamma': 2.48947694410972, 'min_child_weight': 2, 'reg_alpha': 1.3658443193247778, 'reg_lambda': 0.004246762421284045}. Best is trial 0 with value: 36.89783402413417.
[I 2024-11-29 12:45:52,128] Trial 1 finished with value: 25.457253614195032 and parameters: {'n_estimators': 152, 'max_depth': 19, 'learning_rate': 0.5253953760581075, 'subsample': 0.5971171320291527, 'colsample_bytree': 0.8929581768350559, 'gamma': 4.281821220184373, 'min_child_weight': 4, 'reg_alpha': 0.32517748044692085, 'reg_lambda': 0.4035691685157365}. Best is trial 1 with value: 25.457253614195032.
[I 2024-11-29 12:45:56,252] Trial 2 finished with value: 21.94401523878272 and parameters: {'n_estimators': 102, 'max_depth': 15, 'learning_rate': 0.15299950096165896, 'subsample': 0.895

Modelo salvo como: xgb_model_2023-04-29_to_2024-04-28.pkl
Tunando hiperparâmetros para período de 2023-05-29 a 2024-05-28


[I 2024-11-29 12:50:02,024] Trial 0 finished with value: 24.986090142883988 and parameters: {'n_estimators': 65, 'max_depth': 17, 'learning_rate': 0.17538331560973808, 'subsample': 0.6432912177242628, 'colsample_bytree': 0.8209902878403366, 'gamma': 0.9880587212752956, 'min_child_weight': 9, 'reg_alpha': 4.912910866902986, 'reg_lambda': 0.0408314387443449}. Best is trial 0 with value: 24.986090142883988.
[I 2024-11-29 12:50:22,496] Trial 1 finished with value: 31.02320020937883 and parameters: {'n_estimators': 148, 'max_depth': 17, 'learning_rate': 0.0975645439315592, 'subsample': 0.9251211246722203, 'colsample_bytree': 0.6255414367235087, 'gamma': 0.017910961691973704, 'min_child_weight': 1, 'reg_alpha': 1.8626976956019685, 'reg_lambda': 1.4903431189685024e-05}. Best is trial 0 with value: 24.986090142883988.
[I 2024-11-29 12:50:26,155] Trial 2 finished with value: 37.320396380523576 and parameters: {'n_estimators': 51, 'max_depth': 13, 'learning_rate': 0.6745498312268462, 'subsample'

Modelo salvo como: xgb_model_2023-05-29_to_2024-05-28.pkl
Tunando hiperparâmetros para período de 2023-06-29 a 2024-06-28


[I 2024-11-29 12:54:53,357] Trial 0 finished with value: 30.81283360711824 and parameters: {'n_estimators': 118, 'max_depth': 11, 'learning_rate': 0.24630734140504318, 'subsample': 0.6009988840960103, 'colsample_bytree': 0.7471831515689424, 'gamma': 1.753621992331496, 'min_child_weight': 6, 'reg_alpha': 0.0002608722403765824, 'reg_lambda': 2.3102702076509478e-05}. Best is trial 0 with value: 30.81283360711824.
[I 2024-11-29 12:54:58,150] Trial 1 finished with value: 38.28847533839148 and parameters: {'n_estimators': 68, 'max_depth': 14, 'learning_rate': 0.05701226156605565, 'subsample': 0.6049424922126254, 'colsample_bytree': 0.7443697448841706, 'gamma': 3.599121803864077, 'min_child_weight': 5, 'reg_alpha': 0.002806905485648337, 'reg_lambda': 0.0272841128128553}. Best is trial 0 with value: 30.81283360711824.
[I 2024-11-29 12:54:59,239] Trial 2 finished with value: 42.725161448538366 and parameters: {'n_estimators': 50, 'max_depth': 6, 'learning_rate': 0.21835094718335177, 'subsample'

Modelo salvo como: xgb_model_2023-06-29_to_2024-06-28.pkl
Tunando hiperparâmetros para período de 2023-07-29 a 2024-07-28


[I 2024-11-29 12:58:22,781] Trial 0 finished with value: 57.55006473643016 and parameters: {'n_estimators': 198, 'max_depth': 14, 'learning_rate': 0.7872456920839697, 'subsample': 0.6023681277288975, 'colsample_bytree': 0.7271933423166863, 'gamma': 0.10928666061490067, 'min_child_weight': 10, 'reg_alpha': 1.146566475458054e-05, 'reg_lambda': 7.658196583112945e-05}. Best is trial 0 with value: 57.55006473643016.
[I 2024-11-29 12:58:30,774] Trial 1 finished with value: 43.96848647833076 and parameters: {'n_estimators': 174, 'max_depth': 15, 'learning_rate': 0.6278137166696576, 'subsample': 0.6340820689688087, 'colsample_bytree': 0.6684020422680848, 'gamma': 4.762944948865439, 'min_child_weight': 3, 'reg_alpha': 0.014179252063908661, 'reg_lambda': 0.0006911110558507095}. Best is trial 1 with value: 43.96848647833076.
[I 2024-11-29 12:58:36,411] Trial 2 finished with value: 31.322321188375128 and parameters: {'n_estimators': 102, 'max_depth': 13, 'learning_rate': 0.3233887840146804, 'subsa

Modelo salvo como: xgb_model_2023-07-29_to_2024-07-28.pkl
Tunando hiperparâmetros para período de 2023-08-29 a 2024-08-28


[I 2024-11-29 13:02:32,490] Trial 0 finished with value: 69.6397030039775 and parameters: {'n_estimators': 90, 'max_depth': 7, 'learning_rate': 0.5206720584700236, 'subsample': 0.5886219223703895, 'colsample_bytree': 0.5034619120491065, 'gamma': 2.2075478782596525, 'min_child_weight': 9, 'reg_alpha': 2.56865262366469, 'reg_lambda': 0.004322840510787523}. Best is trial 0 with value: 69.6397030039775.
[I 2024-11-29 13:02:36,394] Trial 1 finished with value: 33.966158155660956 and parameters: {'n_estimators': 135, 'max_depth': 10, 'learning_rate': 0.4057842213968, 'subsample': 0.832997353725164, 'colsample_bytree': 0.685115537874831, 'gamma': 0.2772221311550921, 'min_child_weight': 3, 'reg_alpha': 0.03065792527756932, 'reg_lambda': 1.534993498741834e-05}. Best is trial 1 with value: 33.966158155660956.
[I 2024-11-29 13:02:38,044] Trial 2 finished with value: 49.238400198553244 and parameters: {'n_estimators': 119, 'max_depth': 4, 'learning_rate': 0.4976136473931985, 'subsample': 0.8468830

Modelo salvo como: xgb_model_2023-08-29_to_2024-08-28.pkl
Tunando hiperparâmetros para período de 2023-09-29 a 2024-09-28


[I 2024-11-29 13:06:20,038] Trial 0 finished with value: 53.735371185636005 and parameters: {'n_estimators': 71, 'max_depth': 18, 'learning_rate': 0.6012517764202563, 'subsample': 0.7903524162496722, 'colsample_bytree': 0.5344872780145645, 'gamma': 3.5043931544784024, 'min_child_weight': 7, 'reg_alpha': 2.6698803087357416e-05, 'reg_lambda': 0.0004663235358740924}. Best is trial 0 with value: 53.735371185636005.
[I 2024-11-29 13:06:22,345] Trial 1 finished with value: 24.939271171331598 and parameters: {'n_estimators': 126, 'max_depth': 6, 'learning_rate': 0.07155474578829593, 'subsample': 0.804439224234571, 'colsample_bytree': 0.8507532335583434, 'gamma': 4.0614274086635564, 'min_child_weight': 5, 'reg_alpha': 1.1653264998343438, 'reg_lambda': 0.0019279606016594933}. Best is trial 1 with value: 24.939271171331598.
[I 2024-11-29 13:06:24,879] Trial 2 finished with value: 37.553324781844935 and parameters: {'n_estimators': 133, 'max_depth': 6, 'learning_rate': 0.5615946070038094, 'subsam

Modelo salvo como: xgb_model_2023-09-29_to_2024-09-28.pkl
Tunando hiperparâmetros para período de 2023-10-29 a 2024-10-28


[I 2024-11-29 13:09:24,366] Trial 0 finished with value: 33.74736627803353 and parameters: {'n_estimators': 132, 'max_depth': 3, 'learning_rate': 0.08094862687530989, 'subsample': 0.9776883113485597, 'colsample_bytree': 0.9152672794584653, 'gamma': 1.1174462220786925, 'min_child_weight': 4, 'reg_alpha': 0.0005628387321111403, 'reg_lambda': 1.1333678073688274}. Best is trial 0 with value: 33.74736627803353.
[I 2024-11-29 13:09:26,333] Trial 1 finished with value: 36.6552228212195 and parameters: {'n_estimators': 127, 'max_depth': 4, 'learning_rate': 0.7704395191800797, 'subsample': 0.831474765697652, 'colsample_bytree': 0.707742224822195, 'gamma': 1.5560993716659954, 'min_child_weight': 6, 'reg_alpha': 1.2849346873192504e-05, 'reg_lambda': 6.487603156413612e-05}. Best is trial 0 with value: 33.74736627803353.
[I 2024-11-29 13:09:30,786] Trial 2 finished with value: 27.194870658155754 and parameters: {'n_estimators': 121, 'max_depth': 14, 'learning_rate': 0.70605670948224, 'subsample': 0

Modelo salvo como: xgb_model_2023-10-29_to_2024-10-28.pkl
Tunando hiperparâmetros para período de 2023-11-29 a 2024-11-28


[I 2024-11-29 13:12:23,338] Trial 0 finished with value: 37.30910535087326 and parameters: {'n_estimators': 150, 'max_depth': 6, 'learning_rate': 0.6336940760017286, 'subsample': 0.6934703943392279, 'colsample_bytree': 0.6280894518407387, 'gamma': 4.102640215689577, 'min_child_weight': 3, 'reg_alpha': 0.3836851490700641, 'reg_lambda': 0.0013878046092042277}. Best is trial 0 with value: 37.30910535087326.
[I 2024-11-29 13:12:27,169] Trial 1 finished with value: 32.49660911948679 and parameters: {'n_estimators': 83, 'max_depth': 18, 'learning_rate': 0.6396707912767787, 'subsample': 0.5219064028874176, 'colsample_bytree': 0.8985648684063727, 'gamma': 2.9895143277035103, 'min_child_weight': 5, 'reg_alpha': 0.028631906141643706, 'reg_lambda': 0.0023627482092675894}. Best is trial 1 with value: 32.49660911948679.
[I 2024-11-29 13:12:30,173] Trial 2 finished with value: 64.41173195141685 and parameters: {'n_estimators': 60, 'max_depth': 18, 'learning_rate': 0.6585880110119566, 'subsample': 0.

Modelo salvo como: xgb_model_2023-11-29_to_2024-11-28.pkl
