# Parte II: Preprocesamiento y Optimización
## Dataset: Medical Cost Personal Dataset

### Parte 1: Preprocesamiento de Datos

In [7]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
import seaborn as sns

# Cargar el dataset
data = pd.read_csv('insurance.csv')

# Mostrar información inicial
print(data.info())
print(data.head())

# Manejo de outliers
numeric_columns = ['age', 'bmi', 'charges']
for col in numeric_columns:
    Q1 = data[col].quantile(0.25)
    Q3 = data[col].quantile(0.75)
    IQR = Q3 - Q1
    data = data[(data[col] >= (Q1 - 1.5 * IQR)) & (data[col] <= (Q3 + 1.5 * IQR))]

# Codificación y escalado con ColumnTransformer
numeric_features = ['age', 'bmi', 'children']
categorical_features = ['sex', 'smoker', 'region']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ]
)

# Separar variables predictoras y objetivo
data['high_cost'] = (data['charges'] > data['charges'].median()).astype(int)
X = data.drop(columns=['charges', 'high_cost'])
y = data['high_cost']

# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB
None
   age     sex     bmi  children smoker     region      charges
0   19  female  27.900         0    yes  southwest  16884.92400
1   18    male  33.770         1     no  southeast   1725.55230
2   28    male  33.000         3     no  southeast   4449.46200
3   33    male  22.705         0     no  northwest  21984.47061
4   32    male  28.880         0     no  northwest   3866.85520


### Parte 2: Selección de Técnica de Machine Learning

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

# Modelos a evaluar
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'KNN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'LightGBM': LGBMClassifier()
}

# Evaluación inicial
for name, model in models.items():
    pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', model)])
    scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='accuracy')
    print(f"{name} - Accuracy: {np.mean(scores):.4f} (+/- {np.std(scores):.4f})")

Logistic Regression - Accuracy: 0.9065 (+/- 0.0106)
KNN - Accuracy: 0.8856 (+/- 0.0345)
Decision Tree - Accuracy: 0.8823 (+/- 0.0257)
Random Forest - Accuracy: 0.9275 (+/- 0.0164)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



XGBoost - Accuracy: 0.9233 (+/- 0.0140)
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000328 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000202 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used featu

### Parte 3: Optimización de Hiperparámetros

In [9]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import optuna

# GridSearchCV para Random Forest
param_grid_rf = {
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [None, 10, 20, 30],
    'classifier__min_samples_split': [2, 5, 10]
}

pipeline_rf = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', RandomForestClassifier())])
grid_rf = GridSearchCV(pipeline_rf, param_grid_rf, cv=5, scoring='accuracy')
grid_rf.fit(X_train, y_train)
print("Mejores hiperparámetros para Random Forest:", grid_rf.best_params_)

# RandomizedSearchCV para XGBoost
param_dist_xgb = {
    'classifier__n_estimators': [50, 100, 200],
    'classifier__learning_rate': [0.01, 0.1, 0.2, 0.3],
    'classifier__max_depth': [3, 5, 10]
}

pipeline_xgb = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', XGBClassifier(eval_metric='logloss'))])
random_xgb = RandomizedSearchCV(pipeline_xgb, param_dist_xgb, n_iter=10, cv=5, scoring='accuracy', random_state=42)
random_xgb.fit(X_train, y_train)
print("Mejores hiperparámetros para XGBoost:", random_xgb.best_params_)

# Optuna para LightGBM
def objective(trial):
    param_lgb = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10)
    }
    pipeline_lgb = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', LGBMClassifier(**param_lgb))])
    scores = cross_val_score(pipeline_lgb, X_train, y_train, cv=5, scoring='accuracy')
    return np.mean(scores)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)
print("Mejores hiperparámetros para LightGBM:", study.best_params)

  from .autonotebook import tqdm as notebook_tqdm


Mejores hiperparámetros para Random Forest: {'classifier__max_depth': 10, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 200}


[I 2024-11-26 01:14:56,759] A new study created in memory with name: no-name-043b0216-8eff-4813-bab6-827dd898ff98


Mejores hiperparámetros para XGBoost: {'classifier__n_estimators': 100, 'classifier__max_depth': 5, 'classifier__learning_rate': 0.1}
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train 

[I 2024-11-26 01:14:57,315] Trial 0 finished with value: 0.9243703499586664 and parameters: {'n_estimators': 79, 'learning_rate': 0.23920079059237065, 'max_depth': 5}. Best is trial 0 with value: 0.9243703499586664.


[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000801 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.485564 -> initscore=-0.057759
[LightGBM] [Info] Start training from score -0.057759
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000253 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:14:58,681] Trial 1 finished with value: 0.9233122072196197 and parameters: {'n_estimators': 107, 'learning_rate': 0.15326869878716085, 'max_depth': 10}. Best is trial 0 with value: 0.9243703499586664.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000207 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000245 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:14:59,127] Trial 2 finished with value: 0.9317222375310003 and parameters: {'n_estimators': 66, 'learning_rate': 0.241055516041262, 'max_depth': 5}. Best is trial 2 with value: 0.9317222375310003.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000274 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000281 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:14:59,373] Trial 3 finished with value: 0.9306751171121521 and parameters: {'n_estimators': 69, 'learning_rate': 0.2787358866113225, 'max_depth': 3}. Best is trial 2 with value: 0.9317222375310003.


[LightGBM] [Info] Number of positive: 371, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000251 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 316
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486877 -> initscore=-0.052505
[LightGBM] [Info] Start training from score -0.052505
[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000241 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:14:59,668] Trial 4 finished with value: 0.933827500688895 and parameters: {'n_estimators': 57, 'learning_rate': 0.1813469793650054, 'max_depth': 4}. Best is trial 4 with value: 0.933827500688895.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000210 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:00,746] Trial 5 finished with value: 0.9243593276384677 and parameters: {'n_estimators': 173, 'learning_rate': 0.15596611112645448, 'max_depth': 5}. Best is trial 4 with value: 0.933827500688895.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000219 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:01,738] Trial 6 finished with value: 0.9317222375310003 and parameters: {'n_estimators': 126, 'learning_rate': 0.04628508702925563, 'max_depth': 6}. Best is trial 4 with value: 0.933827500688895.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000260 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000280 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:03,048] Trial 7 finished with value: 0.9212179663819235 and parameters: {'n_estimators': 161, 'learning_rate': 0.25938654285302, 'max_depth': 6}. Best is trial 4 with value: 0.933827500688895.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000278 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:04,224] Trial 8 finished with value: 0.9212124552218242 and parameters: {'n_estimators': 144, 'learning_rate': 0.18227325946646897, 'max_depth': 6}. Best is trial 4 with value: 0.933827500688895.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000329 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000368 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:04,598] Trial 9 finished with value: 0.9348801322678424 and parameters: {'n_estimators': 69, 'learning_rate': 0.1003933989041736, 'max_depth': 3}. Best is trial 9 with value: 0.9348801322678424.


[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000395 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.485564 -> initscore=-0.057759
[LightGBM] [Info] Start training from score -0.057759
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000565 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:07,124] Trial 10 finished with value: 0.9275227335354093 and parameters: {'n_estimators': 197, 'learning_rate': 0.04550903726860456, 'max_depth': 9}. Best is trial 9 with value: 0.9348801322678424.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000287 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000219 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:07,453] Trial 11 finished with value: 0.9327748691099476 and parameters: {'n_estimators': 95, 'learning_rate': 0.13374454612149664, 'max_depth': 3}. Best is trial 9 with value: 0.9348801322678424.


[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.485564 -> initscore=-0.057759
[LightGBM] [Info] Start training from score -0.057759
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:07,663] Trial 12 finished with value: 0.9348856434279416 and parameters: {'n_estimators': 55, 'learning_rate': 0.09626009164454809, 'max_depth': 3}. Best is trial 12 with value: 0.9348856434279416.


[LightGBM] [Info] Number of positive: 371, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 313
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486877 -> initscore=-0.052505
[LightGBM] [Info] Start training from score -0.052505
[LightGBM] [Info] Number of positive: 371, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 316
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:07,983] Trial 13 finished with value: 0.9338330118489943 and parameters: {'n_estimators': 54, 'learning_rate': 0.09463584917389016, 'max_depth': 3}. Best is trial 12 with value: 0.9348856434279416.


[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.485564 -> initscore=-0.057759
[LightGBM] [Info] Start training from score -0.057759
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000231 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info

[I 2024-11-26 01:15:08,976] Trial 14 finished with value: 0.9285753651143566 and parameters: {'n_estimators': 91, 'learning_rate': 0.08229286558208407, 'max_depth': 8}. Best is trial 12 with value: 0.9348856434279416.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000224 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000218 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:09,550] Trial 15 finished with value: 0.9348856434279416 and parameters: {'n_estimators': 117, 'learning_rate': 0.01835806893455949, 'max_depth': 4}. Best is trial 12 with value: 0.9348856434279416.


[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000265 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.485564 -> initscore=-0.057759
[LightGBM] [Info] Start training from score -0.057759
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000251 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:10,116] Trial 16 finished with value: 0.932785891430146 and parameters: {'n_estimators': 120, 'learning_rate': 0.01279966136225405, 'max_depth': 4}. Best is trial 12 with value: 0.9348856434279416.


[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000268 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.485564 -> initscore=-0.057759
[LightGBM] [Info] Start training from score -0.057759
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000191 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:11,378] Trial 17 finished with value: 0.932780380270047 and parameters: {'n_estimators': 142, 'learning_rate': 0.023651138847873607, 'max_depth': 7}. Best is trial 12 with value: 0.9348856434279416.


[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486202 -> initscore=-0.055205
[LightGBM] [Info] Start training from score -0.055205
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000253 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 318
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:11,953] Trial 18 finished with value: 0.9338330118489943 and parameters: {'n_estimators': 115, 'learning_rate': 0.06151321278273885, 'max_depth': 4}. Best is trial 12 with value: 0.9348856434279416.


[LightGBM] [Info] Number of positive: 370, number of negative: 392
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000252 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 314
[LightGBM] [Info] Number of data points in the train set: 762, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.485564 -> initscore=-0.057759
[LightGBM] [Info] Start training from score -0.057759
[LightGBM] [Info] Number of positive: 370, number of negative: 391
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000278 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 315
[LightGBM] [Info] Number of data points in the train set: 761, number of used features: 11
[LightGBM] [Info] [binary:BoostF

[I 2024-11-26 01:15:12,406] Trial 19 finished with value: 0.9306806282722512 and parameters: {'n_estimators': 90, 'learning_rate': 0.12280298040806993, 'max_depth': 4}. Best is trial 12 with value: 0.9348856434279416.


Mejores hiperparámetros para LightGBM: {'n_estimators': 55, 'learning_rate': 0.09626009164454809, 'max_depth': 3}
