In [36]:
import sys
sys.path.append("/Users/karthika")  # Parent folder

In [37]:
from housing_app_fall25.housing_pipeline import SafeDivide, build_preprocessing

In [38]:
# Install required packages (if not already installed)
!pip install scikit-learn lightgbm xgboost optuna pandas numpy joblib seaborn matplotlib




In [39]:
import pandas as pd
import numpy as np
import joblib
from pathlib import Path

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import f1_score

from sklearn.linear_model import RidgeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

import optuna
import warnings
warnings.filterwarnings("ignore")

In [40]:
import pandas as pd

df = pd.read_csv("../data/housing.csv")
print(df.columns)


Index(['Order', 'PID', 'MS SubClass', 'MS Zoning', 'Lot Frontage', 'Lot Area',
       'Street', 'Alley', 'Lot Shape', 'Land Contour', 'Utilities',
       'Lot Config', 'Land Slope', 'Neighborhood', 'Condition 1',
       'Condition 2', 'Bldg Type', 'House Style', 'Overall Qual',
       'Overall Cond', 'Year Built', 'Year Remod/Add', 'Roof Style',
       'Roof Matl', 'Exterior 1st', 'Exterior 2nd', 'Mas Vnr Type',
       'Mas Vnr Area', 'Exter Qual', 'Exter Cond', 'Foundation', 'Bsmt Qual',
       'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin SF 1',
       'BsmtFin Type 2', 'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF',
       'Heating', 'Heating QC', 'Central Air', 'Electrical', '1st Flr SF',
       '2nd Flr SF', 'Low Qual Fin SF', 'Gr Liv Area', 'Bsmt Full Bath',
       'Bsmt Half Bath', 'Full Bath', 'Half Bath', 'Bedroom AbvGr',
       'Kitchen AbvGr', 'Kitchen Qual', 'TotRms AbvGrd', 'Functional',
       'Fireplaces', 'Fireplace Qu', 'Garage Type', 'Garage Yr Blt',
      

Step 1: Load Dataset & Create Classification Target

In [41]:
# Load dataset
df = pd.read_csv("../data/housing.csv")  # Adjust path if needed

# Convert to classification: HighPrice if SalePrice > median
median_price = df['SalePrice'].median()
df['HighPrice'] = (df['SalePrice'] > median_price).astype(int)

# Features / target
X = df.drop(columns=['SalePrice', 'HighPrice'])
y = df['HighPrice']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")
print(y_train.value_counts())


Train shape: (2344, 81), Test shape: (586, 81)
HighPrice
0    1174
1    1170
Name: count, dtype: int64


In [42]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer


In [43]:
import pandas as pd

# Check how many unique values each feature has
print(df.nunique())

# Check for missing values
print(df.isnull().sum())

# Basic descriptive statistics
print(df.describe())


Order             2930
PID               2930
MS SubClass         16
MS Zoning            7
Lot Frontage       128
                  ... 
Yr Sold              5
Sale Type           10
Sale Condition       6
SalePrice         1032
HighPrice            2
Length: 83, dtype: int64
Order               0
PID                 0
MS SubClass         0
MS Zoning           0
Lot Frontage      490
                 ... 
Yr Sold             0
Sale Type           0
Sale Condition      0
SalePrice           0
HighPrice           0
Length: 83, dtype: int64
            Order           PID  MS SubClass  Lot Frontage       Lot Area  \
count  2930.00000  2.930000e+03  2930.000000   2440.000000    2930.000000   
mean   1465.50000  7.144645e+08    57.387372     69.224590   10147.921843   
std     845.96247  1.887308e+08    42.638025     23.365335    7880.017759   
min       1.00000  5.263011e+08    20.000000     21.000000    1300.000000   
25%     733.25000  5.284770e+08    20.000000     58.000000    7440.250

Step 2: Define Preprocessing Pipeline

In [44]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Identify numeric and categorical columns
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X_train.select_dtypes(include=['object', 'category']).columns.tolist()

# Numeric pipeline
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Categorical pipeline
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Full preprocessor (NO SafeDivide)
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)


# SafeDivide transformer
from sklearn.base import BaseEstimator, TransformerMixin

class SafeDivide(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copy = X.copy()
        numerator = X_copy.iloc[:, 0].fillna(0)
        denominator = X_copy.iloc[:, 1].replace(0,1).fillna(1)
        return (numerator / denominator).values.reshape(-1,1)

# Identify numeric and categorical columns
numeric_features = X_train.select_dtypes(include=['int64','float64']).columns.tolist()
categorical_features = X_train.select_dtypes(include=['object']).columns.tolist()

# Numeric pipeline
numeric_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Categorical pipeline
categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Full preprocessor with SafeDivide (example using first two numeric cols)
safe_divider = Pipeline([('safe_divide', SafeDivide())])
preprocessor = ColumnTransformer([
    ('safe_div', safe_divider, numeric_features[:2]),
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])


Step 3: Define Classifiers & Experiment Configurations

In [45]:
# 4 classifiers
classifiers = {
    "Ridge": RidgeClassifier(),
    "GradientBoosting": GradientBoostingClassifier(random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    "LightGBM": LGBMClassifier(random_state=42)
}

# 4 conditions
conditions = [
    {"PCA": False, "Optuna": False},
    {"PCA": False, "Optuna": True},
    {"PCA": True, "Optuna": False},
    {"PCA": True, "Optuna": True}
]

# Folder to save models
model_dir = Path("../api/models")
model_dir.mkdir(parents=True, exist_ok=True)

# Log for results
results = []


Step 4: Define Optuna Objective

In [46]:
def objective(trial, classifier_name, X_train, y_train, use_pca):
    # Hyperparameters
    if classifier_name == "GradientBoosting":
        n_estimators = trial.suggest_int("n_estimators", 50, 300)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        clf = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate, random_state=42)
    elif classifier_name == "XGBoost":
        n_estimators = trial.suggest_int("n_estimators", 50, 300)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        clf = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
                            use_label_encoder=False, eval_metric='logloss', random_state=42)
    elif classifier_name == "LightGBM":
        n_estimators = trial.suggest_int("n_estimators", 50, 300)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        clf = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, random_state=42)
    else:  # Ridge
        alpha = trial.suggest_float("alpha", 0.1, 10.0)
        clf = RidgeClassifier(alpha=alpha)
    
    steps = [('preprocessor', preprocessor)]
    if use_pca:
        steps.append(('svd', TruncatedSVD(n_components=min(100, X_train.shape[1]))))
    steps.append(('classifier', clf))
    
    pipeline = Pipeline(steps)
    
    score = cross_val_score(pipeline, X_train, y_train, cv=3, scoring='f1').mean()
    return score


Step 5: Run 16 Experiments

In [47]:
import warnings
warnings.filterwarnings("ignore")

for clf_name, clf in classifiers.items():
    for cond in conditions:
        use_pca = cond["PCA"]
        use_optuna = cond["Optuna"]
        experiment_name = f"{clf_name}_{'PCA' if use_pca else 'NoPCA'}_{'Optuna' if use_optuna else 'NoOptuna'}"
        print(f"Running experiment: {experiment_name}")
        
        if use_optuna:
            study = optuna.create_study(direction="maximize")
            study.optimize(lambda trial: objective(trial, clf_name, X_train, y_train, use_pca), n_trials=20)
            best_params = study.best_params
            print("Best params:", best_params)
            
            # Build classifier with best params
            if clf_name == "GradientBoosting":
                clf = GradientBoostingClassifier(**best_params, random_state=42)
            elif clf_name == "XGBoost":
                clf = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
            elif clf_name == "LightGBM":
                clf = LGBMClassifier(random_state=42, **best_params)
            else:
                clf = RidgeClassifier(**best_params)
        
        # Build pipeline
        steps = [('preprocessor', preprocessor)]
        if use_pca:
            steps.append(('svd', TruncatedSVD(n_components=min(100, X_train.shape[1]))))
        steps.append(('classifier', clf))
        pipeline = Pipeline(steps)
        
        # Train full training set
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        test_f1 = f1_score(y_test, y_pred)
        
        # Save model
        model_file = model_dir / f"{experiment_name}.pkl"
        joblib.dump(pipeline, model_file)
        
        # Log results
        results.append({
            "Experiment": experiment_name,
            "Classifier": clf_name,
            "PCA": use_pca,
            "Optuna": use_optuna,
            "Test F1-score": test_f1,
            "Model Path": str(model_file)
        })
        print(f"{experiment_name} -> F1: {test_f1:.4f}, saved to {model_file}")


[I 2025-12-18 10:04:48,032] A new study created in memory with name: no-name-5f3e55dc-cc15-4b63-86f6-dfa9a0da3722
[I 2025-12-18 10:04:48,134] Trial 0 finished with value: 0.9172062948699663 and parameters: {'alpha': 4.520266490964633}. Best is trial 0 with value: 0.9172062948699663.


Running experiment: Ridge_NoPCA_NoOptuna
Ridge_NoPCA_NoOptuna -> F1: 0.9210, saved to ../api/models/Ridge_NoPCA_NoOptuna.pkl
Running experiment: Ridge_NoPCA_Optuna


[I 2025-12-18 10:04:48,246] Trial 1 finished with value: 0.9162528189036343 and parameters: {'alpha': 2.6030475429524333}. Best is trial 0 with value: 0.9172062948699663.
[I 2025-12-18 10:04:48,345] Trial 2 finished with value: 0.9163435605895162 and parameters: {'alpha': 6.318025563044053}. Best is trial 0 with value: 0.9172062948699663.
[I 2025-12-18 10:04:48,446] Trial 3 finished with value: 0.9171993586211807 and parameters: {'alpha': 4.883396228577902}. Best is trial 0 with value: 0.9172062948699663.
[I 2025-12-18 10:04:48,543] Trial 4 finished with value: 0.9158536988772038 and parameters: {'alpha': 9.66332565630756}. Best is trial 0 with value: 0.9172062948699663.
[I 2025-12-18 10:04:48,644] Trial 5 finished with value: 0.9163435605895162 and parameters: {'alpha': 7.201420428593681}. Best is trial 0 with value: 0.9172062948699663.
[I 2025-12-18 10:04:48,751] Trial 6 finished with value: 0.9166495435711043 and parameters: {'alpha': 3.0765960139142416}. Best is trial 0 with value:

Best params: {'alpha': 4.520266490964633}
Ridge_NoPCA_Optuna -> F1: 0.9262, saved to ../api/models/Ridge_NoPCA_Optuna.pkl
Running experiment: Ridge_PCA_NoOptuna
Ridge_PCA_NoOptuna -> F1: 0.9110, saved to ../api/models/Ridge_PCA_NoOptuna.pkl
Running experiment: Ridge_PCA_Optuna


[I 2025-12-18 10:04:50,499] Trial 0 finished with value: 0.9057185883922911 and parameters: {'alpha': 2.1660692656043863}. Best is trial 0 with value: 0.9057185883922911.
[I 2025-12-18 10:04:50,670] Trial 1 finished with value: 0.9052329391827411 and parameters: {'alpha': 4.581993484040073}. Best is trial 0 with value: 0.9057185883922911.
[I 2025-12-18 10:04:50,842] Trial 2 finished with value: 0.9056268934422288 and parameters: {'alpha': 2.005609893729005}. Best is trial 0 with value: 0.9057185883922911.
[I 2025-12-18 10:04:51,015] Trial 3 finished with value: 0.9052329391827411 and parameters: {'alpha': 7.6250461258528865}. Best is trial 0 with value: 0.9057185883922911.
[I 2025-12-18 10:04:51,186] Trial 4 finished with value: 0.9062020276604996 and parameters: {'alpha': 4.292212485919793}. Best is trial 4 with value: 0.9062020276604996.
[I 2025-12-18 10:04:51,357] Trial 5 finished with value: 0.9062031894933608 and parameters: {'alpha': 7.932418620010195}. Best is trial 5 with value

Best params: {'alpha': 7.932418620010195}
Ridge_PCA_Optuna -> F1: 0.9110, saved to ../api/models/Ridge_PCA_Optuna.pkl
Running experiment: GradientBoosting_NoPCA_NoOptuna


[I 2025-12-18 10:04:54,845] A new study created in memory with name: no-name-dbdc0854-ce91-4fa9-a295-435cabcc8e80


GradientBoosting_NoPCA_NoOptuna -> F1: 0.9288, saved to ../api/models/GradientBoosting_NoPCA_NoOptuna.pkl
Running experiment: GradientBoosting_NoPCA_Optuna


[I 2025-12-18 10:04:55,929] Trial 0 finished with value: 0.929545381082309 and parameters: {'n_estimators': 55, 'learning_rate': 0.18818519007999238}. Best is trial 0 with value: 0.929545381082309.
[I 2025-12-18 10:04:59,763] Trial 1 finished with value: 0.9340711386994917 and parameters: {'n_estimators': 216, 'learning_rate': 0.19545925281087775}. Best is trial 1 with value: 0.9340711386994917.
[I 2025-12-18 10:05:01,763] Trial 2 finished with value: 0.9336704417872225 and parameters: {'n_estimators': 111, 'learning_rate': 0.19427123780860864}. Best is trial 1 with value: 0.9340711386994917.
[I 2025-12-18 10:05:05,343] Trial 3 finished with value: 0.9332229094903086 and parameters: {'n_estimators': 196, 'learning_rate': 0.1450698154653185}. Best is trial 1 with value: 0.9340711386994917.
[I 2025-12-18 10:05:06,541] Trial 4 finished with value: 0.9313741489816446 and parameters: {'n_estimators': 64, 'learning_rate': 0.2957424811115792}. Best is trial 1 with value: 0.9340711386994917.
[

Best params: {'n_estimators': 65, 'learning_rate': 0.2243903469114616}
GradientBoosting_NoPCA_Optuna -> F1: 0.9336, saved to ../api/models/GradientBoosting_NoPCA_Optuna.pkl
Running experiment: GradientBoosting_PCA_NoOptuna


[I 2025-12-18 10:05:56,111] A new study created in memory with name: no-name-3ca445f4-5b31-4fd9-b54a-13f8179dda15


GradientBoosting_PCA_NoOptuna -> F1: 0.9147, saved to ../api/models/GradientBoosting_PCA_NoOptuna.pkl
Running experiment: GradientBoosting_PCA_Optuna


[I 2025-12-18 10:06:13,879] Trial 0 finished with value: 0.9189298011461885 and parameters: {'n_estimators': 264, 'learning_rate': 0.2161689203940332}. Best is trial 0 with value: 0.9189298011461885.
[I 2025-12-18 10:06:28,807] Trial 1 finished with value: 0.9201894391303579 and parameters: {'n_estimators': 220, 'learning_rate': 0.24294228745202062}. Best is trial 1 with value: 0.9201894391303579.
[I 2025-12-18 10:06:45,232] Trial 2 finished with value: 0.9139991923519742 and parameters: {'n_estimators': 245, 'learning_rate': 0.21289701922991716}. Best is trial 1 with value: 0.9201894391303579.
[I 2025-12-18 10:06:50,786] Trial 3 finished with value: 0.9180408604501604 and parameters: {'n_estimators': 82, 'learning_rate': 0.16406576775076312}. Best is trial 1 with value: 0.9201894391303579.
[I 2025-12-18 10:07:10,853] Trial 4 finished with value: 0.9220066805103766 and parameters: {'n_estimators': 298, 'learning_rate': 0.2366721794745592}. Best is trial 4 with value: 0.9220066805103766

Best params: {'n_estimators': 128, 'learning_rate': 0.10532015093366226}


[I 2025-12-18 10:09:55,426] A new study created in memory with name: no-name-e62399b5-de10-4aca-9aee-5e7e8d08dcd3


GradientBoosting_PCA_Optuna -> F1: 0.9218, saved to ../api/models/GradientBoosting_PCA_Optuna.pkl
Running experiment: XGBoost_NoPCA_NoOptuna
XGBoost_NoPCA_NoOptuna -> F1: 0.9199, saved to ../api/models/XGBoost_NoPCA_NoOptuna.pkl
Running experiment: XGBoost_NoPCA_Optuna


[I 2025-12-18 10:09:56,423] Trial 0 finished with value: 0.9273917259211378 and parameters: {'n_estimators': 277, 'max_depth': 9, 'learning_rate': 0.01898520814131068}. Best is trial 0 with value: 0.9273917259211378.
[I 2025-12-18 10:09:57,049] Trial 1 finished with value: 0.9347199769643639 and parameters: {'n_estimators': 229, 'max_depth': 8, 'learning_rate': 0.08133809717571865}. Best is trial 1 with value: 0.9347199769643639.
[I 2025-12-18 10:09:57,342] Trial 2 finished with value: 0.9340177316284984 and parameters: {'n_estimators': 129, 'max_depth': 5, 'learning_rate': 0.21081323028563584}. Best is trial 1 with value: 0.9347199769643639.
[I 2025-12-18 10:09:57,737] Trial 3 finished with value: 0.9320443735556548 and parameters: {'n_estimators': 103, 'max_depth': 9, 'learning_rate': 0.09003073023942021}. Best is trial 1 with value: 0.9347199769643639.
[I 2025-12-18 10:09:58,009] Trial 4 finished with value: 0.9339416687921238 and parameters: {'n_estimators': 176, 'max_depth': 3, 'l

Best params: {'n_estimators': 235, 'max_depth': 4, 'learning_rate': 0.13435802580311448}
XGBoost_NoPCA_Optuna -> F1: 0.9247, saved to ../api/models/XGBoost_NoPCA_Optuna.pkl
Running experiment: XGBoost_PCA_NoOptuna


[I 2025-12-18 10:10:05,586] A new study created in memory with name: no-name-46811840-79dd-4564-90fb-692fd5a22f69


XGBoost_PCA_NoOptuna -> F1: 0.9270, saved to ../api/models/XGBoost_PCA_NoOptuna.pkl
Running experiment: XGBoost_PCA_Optuna


[I 2025-12-18 10:10:06,480] Trial 0 finished with value: 0.9319060402795748 and parameters: {'n_estimators': 207, 'max_depth': 9, 'learning_rate': 0.26319400975254825}. Best is trial 0 with value: 0.9319060402795748.
[I 2025-12-18 10:10:07,693] Trial 1 finished with value: 0.9286942295774274 and parameters: {'n_estimators': 209, 'max_depth': 8, 'learning_rate': 0.07938808439471079}. Best is trial 0 with value: 0.9319060402795748.
[I 2025-12-18 10:10:08,313] Trial 2 finished with value: 0.9291494438068452 and parameters: {'n_estimators': 155, 'max_depth': 4, 'learning_rate': 0.14711454656050382}. Best is trial 0 with value: 0.9319060402795748.
[I 2025-12-18 10:10:09,320] Trial 3 finished with value: 0.9241701003791053 and parameters: {'n_estimators': 181, 'max_depth': 5, 'learning_rate': 0.03323488938878419}. Best is trial 0 with value: 0.9319060402795748.
[I 2025-12-18 10:10:09,977] Trial 4 finished with value: 0.930867852516637 and parameters: {'n_estimators': 172, 'max_depth': 4, 'le

Best params: {'n_estimators': 255, 'max_depth': 10, 'learning_rate': 0.16448411787777958}
XGBoost_PCA_Optuna -> F1: 0.9267, saved to ../api/models/XGBoost_PCA_Optuna.pkl
Running experiment: LightGBM_NoPCA_NoOptuna
[LightGBM] [Info] Number of positive: 1170, number of negative: 1174
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002096 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 2344, number of used features: 203
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499147 -> initscore=-0.003413
[LightGBM] [Info] Start training from score -0.003413


[I 2025-12-18 10:10:22,787] A new study created in memory with name: no-name-8e827d49-0e33-401a-959e-131bba34e042


LightGBM_NoPCA_NoOptuna -> F1: 0.9270, saved to ../api/models/LightGBM_NoPCA_NoOptuna.pkl
Running experiment: LightGBM_NoPCA_Optuna
[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001565 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001997 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the trai

[I 2025-12-18 10:10:23,693] Trial 0 finished with value: 0.9378212167995108 and parameters: {'n_estimators': 252, 'learning_rate': 0.2641743649467987}. Best is trial 0 with value: 0.9378212167995108.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002184 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002065 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:24,702] Trial 1 finished with value: 0.9384927991778134 and parameters: {'n_estimators': 232, 'learning_rate': 0.18741665767300014}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002813 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002571 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:25,884] Trial 2 finished with value: 0.9311200651354515 and parameters: {'n_estimators': 190, 'learning_rate': 0.02305482408240415}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002567 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:27,604] Trial 3 finished with value: 0.9365857062745974 and parameters: {'n_estimators': 224, 'learning_rate': 0.03291498074747613}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003077 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003722 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:28,970] Trial 4 finished with value: 0.9355859532721528 and parameters: {'n_estimators': 172, 'learning_rate': 0.17788697373922666}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003034 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001568 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:29,881] Trial 5 finished with value: 0.9343108147234581 and parameters: {'n_estimators': 141, 'learning_rate': 0.051504232229502386}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001973 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001345 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:30,622] Trial 6 finished with value: 0.9318342735877341 and parameters: {'n_estimators': 121, 'learning_rate': 0.059380783888591276}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001398 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002673 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:31,532] Trial 7 finished with value: 0.9367378551589987 and parameters: {'n_estimators': 155, 'learning_rate': 0.09704970561609755}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001753 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001539 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839


[I 2025-12-18 10:10:31,991] Trial 8 finished with value: 0.9336035308220755 and parameters: {'n_estimators': 62, 'learning_rate': 0.2020963760156144}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4027
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 191
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001789 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM]

[I 2025-12-18 10:10:32,655] Trial 9 finished with value: 0.9349111356143897 and parameters: {'n_estimators': 115, 'learning_rate': 0.21675031733791011}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001552 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001856 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:33,886] Trial 10 finished with value: 0.9359275751998165 and parameters: {'n_estimators': 297, 'learning_rate': 0.13057263395038668}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001879 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:34,608] Trial 11 finished with value: 0.9361998250186568 and parameters: {'n_estimators': 256, 'learning_rate': 0.2974992368137258}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001653 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001733 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:

[I 2025-12-18 10:10:35,366] Trial 12 finished with value: 0.933450125340093 and parameters: {'n_estimators': 244, 'learning_rate': 0.2674209306478454}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001990 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001314 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:36,255] Trial 13 finished with value: 0.9356319355751862 and parameters: {'n_estimators': 295, 'learning_rate': 0.23680681122383998}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001683 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001805 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:37,305] Trial 14 finished with value: 0.935943823037503 and parameters: {'n_estimators': 204, 'learning_rate': 0.15461748192172362}. Best is trial 1 with value: 0.9384927991778134.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001412 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002273 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:38,136] Trial 15 finished with value: 0.9402641403652541 and parameters: {'n_estimators': 258, 'learning_rate': 0.24917885001235152}. Best is trial 15 with value: 0.9402641403652541.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002376 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001562 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:

[I 2025-12-18 10:10:39,188] Trial 16 finished with value: 0.9358288151629823 and parameters: {'n_estimators': 269, 'learning_rate': 0.18260891269146273}. Best is trial 15 with value: 0.9402641403652541.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001686 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002834 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:39,969] Trial 17 finished with value: 0.9370874451072694 and parameters: {'n_estimators': 219, 'learning_rate': 0.23845609534007906}. Best is trial 15 with value: 0.9402641403652541.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002245 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002829 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:41,538] Trial 18 finished with value: 0.9384453972423934 and parameters: {'n_estimators': 277, 'learning_rate': 0.12288283599457014}. Best is trial 15 with value: 0.9402641403652541.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002930 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4011
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 189
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001470 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4036
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 192
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM]

[I 2025-12-18 10:10:42,613] Trial 19 finished with value: 0.9408464102396206 and parameters: {'n_estimators': 230, 'learning_rate': 0.2864773626716049}. Best is trial 19 with value: 0.9408464102396206.


Best params: {'n_estimators': 230, 'learning_rate': 0.2864773626716049}
[LightGBM] [Info] Number of positive: 1170, number of negative: 1174
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003381 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 2344, number of used features: 203
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499147 -> initscore=-0.003413
[LightGBM] [Info] Start training from score -0.003413
LightGBM_NoPCA_Optuna -> F1: 0.9254, saved to ../api/models/LightGBM_NoPCA_Optuna.pkl
Running experiment: LightGBM_PCA_NoOptuna
[LightGBM] [Info] Number of positive: 1170, number of negative: 1174
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000573 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [I

[I 2025-12-18 10:10:43,350] A new study created in memory with name: no-name-eafe77b3-ab63-4dfd-8118-d6823b7d00cf


LightGBM_PCA_NoOptuna -> F1: 0.9302, saved to ../api/models/LightGBM_PCA_NoOptuna.pkl
Running experiment: LightGBM_PCA_Optuna
[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001157 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:Bo

[I 2025-12-18 10:10:44,179] Trial 0 finished with value: 0.9258916783906171 and parameters: {'n_estimators': 281, 'learning_rate': 0.29361067181252204}. Best is trial 0 with value: 0.9258916783906171.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000542 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000613 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:45,008] Trial 1 finished with value: 0.9252767556689169 and parameters: {'n_estimators': 186, 'learning_rate': 0.17807868076307787}. Best is trial 0 with value: 0.9258916783906171.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000568 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000584 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:45,928] Trial 2 finished with value: 0.9266486234667731 and parameters: {'n_estimators': 138, 'learning_rate': 0.11507307417465722}. Best is trial 2 with value: 0.9266486234667731.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000503 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000542 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:46,916] Trial 3 finished with value: 0.929403342868186 and parameters: {'n_estimators': 161, 'learning_rate': 0.0362395845832492}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000570 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:48,250] Trial 4 finished with value: 0.9277747489117045 and parameters: {'n_estimators': 241, 'learning_rate': 0.09657971480759281}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000715 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:48,831] Trial 5 finished with value: 0.9244712444392053 and parameters: {'n_estimators': 77, 'learning_rate': 0.10193497009482294}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000612 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000640 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:49,359] Trial 6 finished with value: 0.9283737767926153 and parameters: {'n_estimators': 50, 'learning_rate': 0.14028589238234}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000665 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000538 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:50,342] Trial 7 finished with value: 0.9240327731275237 and parameters: {'n_estimators': 145, 'learning_rate': 0.026868599786001927}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000483 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839


[I 2025-12-18 10:10:51,089] Trial 8 finished with value: 0.9281109747798623 and parameters: {'n_estimators': 254, 'learning_rate': 0.23184080321424946}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000525 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000607 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:52,236] Trial 9 finished with value: 0.9284299231519709 and parameters: {'n_estimators': 201, 'learning_rate': 0.13693599398762624}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000570 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000527 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839


[I 2025-12-18 10:10:52,951] Trial 10 finished with value: 0.9224020964588938 and parameters: {'n_estimators': 111, 'learning_rate': 0.015009513629736251}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:54,180] Trial 11 finished with value: 0.9251916604066871 and parameters: {'n_estimators': 200, 'learning_rate': 0.05763015603253016}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000522 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000609 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:54,980] Trial 12 finished with value: 0.9287042253034006 and parameters: {'n_estimators': 211, 'learning_rate': 0.1979427133849072}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000649 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:55,965] Trial 13 finished with value: 0.9266205249542248 and parameters: {'n_estimators': 235, 'learning_rate': 0.18508712031171548}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000595 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000514 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:56,655] Trial 14 finished with value: 0.9280852060923858 and parameters: {'n_estimators': 151, 'learning_rate': 0.23342124582214038}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001011 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:57,404] Trial 15 finished with value: 0.9284428713789411 and parameters: {'n_estimators': 213, 'learning_rate': 0.2156168731644978}. Best is trial 3 with value: 0.929403342868186.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000585 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001127 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:58,062] Trial 16 finished with value: 0.9305084655546824 and parameters: {'n_estimators': 171, 'learning_rate': 0.27632970180390704}. Best is trial 16 with value: 0.9305084655546824.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000995 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:58,665] Trial 17 finished with value: 0.931198327547014 and parameters: {'n_estimators': 110, 'learning_rate': 0.281713996939284}. Best is trial 17 with value: 0.931198327547014.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000559 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:10:59,401] Trial 18 finished with value: 0.9306117716842851 and parameters: {'n_estimators': 110, 'learning_rate': 0.2929779406417012}. Best is trial 17 with value: 0.931198327547014.


[LightGBM] [Info] Number of positive: 780, number of negative: 782
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000874 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1562, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499360 -> initscore=-0.002561
[LightGBM] [Info] Start training from score -0.002561
[LightGBM] [Info] Number of positive: 780, number of negative: 783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000633 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 1563, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499040 -> initscore=-0.003839
[LightGBM] [Info] Start training from score -0.003839
[LightGBM] [Info] Nu

[I 2025-12-18 10:11:00,024] Trial 19 finished with value: 0.9323577113465574 and parameters: {'n_estimators': 109, 'learning_rate': 0.26149966770541166}. Best is trial 19 with value: 0.9323577113465574.


Best params: {'n_estimators': 109, 'learning_rate': 0.26149966770541166}
[LightGBM] [Info] Number of positive: 1170, number of negative: 1174
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20655
[LightGBM] [Info] Number of data points in the train set: 2344, number of used features: 81
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499147 -> initscore=-0.003413
[LightGBM] [Info] Start training from score -0.003413
LightGBM_PCA_Optuna -> F1: 0.9275, saved to ../api/models/LightGBM_PCA_Optuna.pkl


Step 6: Save Results to CSV

In [54]:
results_df = pd.DataFrame(results)
results_df.sort_values("Test F1-score", inplace=True)

results_df.to_csv(model_dir / "experiment_results.csv", index=False)
results_df.head(16)


Unnamed: 0,Experiment,Classifier,PCA,Optuna,Test F1-score,Model Path
2,Ridge_PCA_NoOptuna,Ridge,True,False,0.910959,../api/models/Ridge_PCA_NoOptuna.pkl
3,Ridge_PCA_Optuna,Ridge,True,True,0.910959,../api/models/Ridge_PCA_Optuna.pkl
6,GradientBoosting_PCA_NoOptuna,GradientBoosting,True,False,0.914676,../api/models/GradientBoosting_PCA_NoOptuna.pkl
8,XGBoost_NoPCA_NoOptuna,XGBoost,False,False,0.919932,../api/models/XGBoost_NoPCA_NoOptuna.pkl
0,Ridge_NoPCA_NoOptuna,Ridge,False,False,0.920962,../api/models/Ridge_NoPCA_NoOptuna.pkl
7,GradientBoosting_PCA_Optuna,GradientBoosting,True,True,0.921769,../api/models/GradientBoosting_PCA_Optuna.pkl
9,XGBoost_NoPCA_Optuna,XGBoost,False,True,0.924658,../api/models/XGBoost_NoPCA_Optuna.pkl
13,LightGBM_NoPCA_Optuna,LightGBM,False,True,0.925424,../api/models/LightGBM_NoPCA_Optuna.pkl
1,Ridge_NoPCA_Optuna,Ridge,False,True,0.926244,../api/models/Ridge_NoPCA_Optuna.pkl
11,XGBoost_PCA_Optuna,XGBoost,True,True,0.926746,../api/models/XGBoost_PCA_Optuna.pkl
