In [1]:
import pandas as pd
from training import DataPreprocessing
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
import optuna

In [2]:
df = pd.read_parquet("..\\training_set.parquet")

In [3]:
training_set, test_set = DataPreprocessing.stratified_split_dataframe(df, "Label_Index")

X_train = training_set.drop("Label_Index", axis=1)  
y_train = training_set["Label_Index"]

X_test = test_set.drop("Label_Index", axis=1)
y_test = test_set["Label_Index"]

In [4]:
def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 400)
    }

    model = XGBClassifier(**params)
    score = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=3, n_jobs=-1)
    accuracy = score.mean()
    
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=3600)

print(f"Mejores parámetros: {study.best_params}")
print(f"Mejor score: {study.best_value}")


[I 2024-06-17 14:29:16,297] A new study created in memory with name: no-name-f6b7ff90-4b7d-4c11-9c58-750d60d896ed
[I 2024-06-17 14:33:46,212] Trial 0 finished with value: 0.8831896679404011 and parameters: {'learning_rate': 0.23962854155043442, 'max_depth': 8, 'min_child_weight': 7, 'subsample': 0.6142975198934528, 'colsample_bytree': 0.8348945145303123, 'n_estimators': 211}. Best is trial 0 with value: 0.8831896679404011.
[I 2024-06-17 14:35:05,155] Trial 1 finished with value: 0.8818566900678338 and parameters: {'learning_rate': 0.07548745556538515, 'max_depth': 8, 'min_child_weight': 6, 'subsample': 0.7022944161450949, 'colsample_bytree': 0.7363616921633958, 'n_estimators': 55}. Best is trial 0 with value: 0.8831896679404011.
[I 2024-06-17 14:38:46,914] Trial 2 finished with value: 0.8830978405758465 and parameters: {'learning_rate': 0.11681380235412828, 'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.8121795255624243, 'colsample_bytree': 0.7980314171628878, 'n_estimators': 19

Mejores parámetros: {'learning_rate': 0.23962854155043442, 'max_depth': 8, 'min_child_weight': 7, 'subsample': 0.6142975198934528, 'colsample_bytree': 0.8348945145303123, 'n_estimators': 211}
Mejor score: 0.8831896679404011
