In [1]:
! pip install mlflow boto3 awscli

In [2]:
import mlflow
# step 1 : Set up the mlflow tracking server 
mlflow.set_tracking_uri("http://ec2-13-62-226-249.eu-north-1.compute.amazonaws.com:5000/")

In [3]:
mlflow.set_experiment("Exp 7 - Best Model-Feature_Eng")

<Experiment: artifact_location='s3://reddit-reccomender-bucket/14', creation_time=1763558084789, experiment_id='14', last_update_time=1763558084789, lifecycle_stage='active', name='Exp 7 - Best Model-Feature_Eng', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
import spacy
# CHANGES MADE: Import MLflow
import mlflow
import mlflow.lightgbm

In [5]:
df = pd.read_csv('reddit_preprocessing.csv').dropna(subset=['clean_comment'])
# Separate features and target
X_cleaned = df['clean_comment']
y_cleaned = df['category']

# Split the cleaned data into train and test sets (80-20 split)
X_train_cleaned,X_test_cleaned,y_train_cleaned,y_test_cleaned = train_test_split(
    X_cleaned,y_cleaned,test_size=0.2,random_state=42
)

In [6]:
#load spacy language model for POS tagging
import spacy
nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])

In [7]:
def extract_custom_features_batch(text_list):
    results = []

    # nlp.pipe is MUCH faster than individual calls
    for doc in nlp.pipe(text_list, batch_size=64):
        text = doc.text
        word_list = [token.text for token in doc]
        word_count = len(word_list)
        unique_words = len(set(word_list))
        pos_tags = [token.pos_ for token in doc]

        # Base features
        features = {
            "comment_length": len(text),
            "word_count": word_count,
            "avg_word_length": (
                sum(len(w) for w in word_list) / word_count
                if word_count else 0
            ),
            "unique_word_count": unique_words,
            "lexical_diversity": (
                unique_words / word_count if word_count else 0
            ),
            "pos_count": len(pos_tags),
        }

        # POS proportions
        if word_count > 0:
            for tag in set(pos_tags):
                features[f"pos_ratio_{tag}"] = pos_tags.count(tag) / word_count

        results.append(features)

    return results





In [8]:
# apply it to train and test

train_custom_features = pd.DataFrame(
    extract_custom_features_batch(X_train_cleaned)
)

test_custom_features = pd.DataFrame(
    extract_custom_features_batch(X_test_cleaned)
)


#ALIGN train and test so model doesn't break
train_custom_features, test_custom_features = train_custom_features.align(
    test_custom_features, join="outer", axis=1
)

train_custom_features = train_custom_features.fillna(0)
test_custom_features = test_custom_features.fillna(0)

In [9]:
train_custom_features.head()

Unnamed: 0,avg_word_length,comment_length,lexical_diversity,pos_count,pos_ratio_ADJ,pos_ratio_ADP,pos_ratio_ADV,pos_ratio_AUX,pos_ratio_CCONJ,pos_ratio_DET,...,pos_ratio_PART,pos_ratio_PRON,pos_ratio_PROPN,pos_ratio_PUNCT,pos_ratio_SCONJ,pos_ratio_SYM,pos_ratio_VERB,pos_ratio_X,unique_word_count,word_count
0,6.428571,51,1.0,7,0.0,0.142857,0.142857,0.142857,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.428571,0.0,7,7
1,5.166667,36,1.0,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.166667,0.166667,0.0,0.0,0.0,0.333333,0.0,6,6
2,6.222222,64,1.0,9,0.222222,0.0,0.111111,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,9,9
3,6.266667,108,0.933333,15,0.2,0.0,0.0,0.0,0.0,0.0,...,0.066667,0.0,0.0,0.0,0.0,0.0,0.2,0.0,14,15
4,6.0,6,1.0,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1


In [10]:
test_custom_features.isnull().sum()

avg_word_length      0
comment_length       0
lexical_diversity    0
pos_count            0
pos_ratio_ADJ        0
pos_ratio_ADP        0
pos_ratio_ADV        0
pos_ratio_AUX        0
pos_ratio_CCONJ      0
pos_ratio_DET        0
pos_ratio_INTJ       0
pos_ratio_NOUN       0
pos_ratio_NUM        0
pos_ratio_PART       0
pos_ratio_PRON       0
pos_ratio_PROPN      0
pos_ratio_PUNCT      0
pos_ratio_SCONJ      0
pos_ratio_SYM        0
pos_ratio_VERB       0
pos_ratio_X          0
unique_word_count    0
word_count           0
dtype: int64

In [11]:
# Apply BOW with trigram setting and max_features=5000

BOW = CountVectorizer(ngram_range=(1,3),max_features=5000)
X_train_BOW = BOW.fit_transform(X_train_cleaned).astype('float32')
X_test_BOW = BOW.transform(X_test_cleaned).astype('float32')

In [12]:
# Convert BOW to DataFrame
X_train_BOW_df = pd.DataFrame(X_train_BOW.toarray(), columns=BOW.get_feature_names_out())
X_test_BOW_df = pd.DataFrame(X_test_BOW.toarray(), columns=BOW.get_feature_names_out())

In [13]:
#Combine BOW and custom features
X_train_combined = pd.concat([X_train_BOW_df.reset_index(drop=True), train_custom_features.reset_index(drop=True)], axis=1)
X_test_combined = pd.concat([X_test_BOW_df.reset_index(drop=True), test_custom_features.reset_index(drop=True)], axis=1)

In [14]:
X_train_combined

Unnamed: 0,000,000 crore,100,1000,101,120,150,180ml,1947,1984,...,pos_ratio_PART,pos_ratio_PRON,pos_ratio_PROPN,pos_ratio_PUNCT,pos_ratio_SCONJ,pos_ratio_SYM,pos_ratio_VERB,pos_ratio_X,unique_word_count,word_count
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.428571,0.0,7,7
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.166667,0.166667,0.0,0.0,0.0,0.333333,0.0,6,6
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.222222,0.0,9,9
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.066667,0.000000,0.000000,0.0,0.0,0.0,0.200000,0.0,14,15
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29324,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.083333,0.0,0.0,0.0,0.250000,0.0,12,12
29325,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.600000,0.0,0.0,0.0,0.133333,0.0,27,30
29326,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,1,1
29327,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.243243,0.0,31,37


In [15]:
! pip install optuna

In [16]:
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV
from sklearn.metrics import classification_report,accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
# function to optimize LightGbm hyperparameter
def objective(trial):
    # Define hyperparameters to be tuned
    param = {
        "objective": "multiclass",
        "num_class":3,
        "learning_rate": trial.suggest_float("learning_rate",1e-3, 1e-1),
        "n_estimators" : trial.suggest_int("n_estimators",50,500),
        "min_child_samples": trial.suggest_int('min_child_samples', 10, 200),
        "reg_lambda" : trial.suggest_float('reg_lambda', 1e-4, 50.0, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 20),
        "metric": "multi_logloss",
        "is_unbalance": True,
        "class_weight": "balanced",
    }

    # CHANGES MADE: Start nested MLflow run for this trial
    with mlflow.start_run(nested=True):
        # Log trial hyperparameters
        for k, v in param.items():
            mlflow.log_param(k, v)
     
        #Define the Lightgbm parameter with the trial parameter
        model = lgb.LGBMClassifier(**param)

        #perform a cross validation
        scores =cross_val_score(model,X_train_combined,y_train_cleaned,cv=3,scoring='accuracy')

        # Return the average score across folds
        mean_score = scores.mean()

        # Log the mean CV accuracy
        mlflow.log_metric("mean_cv_accuracy", mean_score)

    return mean_score

In [18]:
with mlflow.start_run(run_name="Custom_feature_reddit"):

    # Create an Optuna study to optimize the hyperparameters
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=50)

    # Log best trial params
    best_params = study.best_trial.params
    for k, v in best_params.items():
        mlflow.log_param(f"best_{k}", v)

    best_model = lgb.LGBMClassifier(
        objective='multiclass',
        num_class=3,
        metric="multi_logloss",
        is_unbalance=True,
        class_weight="balanced",
        reg_alpha=0.1,
        reg_lambda=0.26403467489787047,
        learning_rate=0.08,
        max_depth=20,
        n_estimators=465,
        min_child_samples=10
    )

    # fit and log the model
    best_model.fit(X_train_combined, y_train_cleaned)

    # Predict on the test set
    y_test_pred = best_model.predict(X_test_combined)
    accuracy = accuracy_score(y_test_cleaned, y_test_pred)
    mlflow.log_metric("test_accuracy", accuracy)

    # Classification report
    report = classification_report(y_test_cleaned, y_test_pred, output_dict=True)

    for cls in ["0", "1", "2"]:
        if cls in report:
            mlflow.log_metric(f"precision_class_{cls}", report[cls]["precision"])
            mlflow.log_metric(f"recall_class_{cls}", report[cls]["recall"])
            mlflow.log_metric(f"f1_class_{cls}", report[cls]["f1-score"])

    # Log final trained model
    mlflow.lightgbm.log_model(best_model, artifact_path="lightgbm_model_custom_feature")

    print("Test accuracy:", accuracy)
    print(report)




[I 2025-11-19 22:20:29,537] A new study created in memory with name: no-name-be87739e-493f-4109-aeb3-07d472d604c7


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8301
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 769
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059335 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8357
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 770
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing 

[I 2025-11-19 22:21:36,348] Trial 0 finished with value: 0.7283918493702529 and parameters: {'learning_rate': 0.040407186598806315, 'n_estimators': 421, 'min_child_samples': 80, 'reg_lambda': 6.391302756755433, 'max_depth': 3}. Best is trial 0 with value: 0.7283918493702529.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031900 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7232
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 502
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.037853 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7242
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 495
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing 

[I 2025-11-19 22:22:55,812] Trial 1 finished with value: 0.7337789623497614 and parameters: {'learning_rate': 0.06426993791282166, 'n_estimators': 484, 'min_child_samples': 117, 'reg_lambda': 13.722409402426845, 'max_depth': 13}. Best is trial 1 with value: 0.7337789623497614.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026466 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6356
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 302
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023722 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6372
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 301
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025247 s

[I 2025-11-19 22:24:16,282] Trial 2 finished with value: 0.7042176097068277 and parameters: {'learning_rate': 0.05917627730303833, 'n_estimators': 462, 'min_child_samples': 176, 'reg_lambda': 4.830774843101786, 'max_depth': 14}. Best is trial 1 with value: 0.7337789623497614.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.675520 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 15428
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 3021
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.284593 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15618
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3050
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 22:31:59,076] Trial 3 finished with value: 0.8110401366735708 and parameters: {'learning_rate': 0.07610918231721008, 'n_estimators': 355, 'min_child_samples': 20, 'reg_lambda': 0.009813818639495486, 'max_depth': 5}. Best is trial 3 with value: 0.8110401366735708.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040135 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6852
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 413
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6870
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 409
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042194 s

[I 2025-11-19 22:32:59,847] Trial 4 finished with value: 0.6707014214305805 and parameters: {'learning_rate': 0.054312251742778345, 'n_estimators': 80, 'min_child_samples': 137, 'reg_lambda': 0.0003689868072694603, 'max_depth': 4}. Best is trial 3 with value: 0.8110401366735708.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.083951 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10922
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1503
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11011
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1508
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1039

[I 2025-11-19 22:34:28,533] Trial 5 finished with value: 0.7500085408426793 and parameters: {'learning_rate': 0.01963766026092286, 'n_estimators': 159, 'min_child_samples': 41, 'reg_lambda': 0.027093864950836755, 'max_depth': 20}. Best is trial 3 with value: 0.8110401366735708.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.107143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10793
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1468
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.112305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10904
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1477
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1244

[I 2025-11-19 22:35:29,625] Trial 6 finished with value: 0.8038801170354218 and parameters: {'learning_rate': 0.06918299995507722, 'n_estimators': 203, 'min_child_samples': 42, 'reg_lambda': 0.14795570346975878, 'max_depth': 17}. Best is trial 3 with value: 0.8110401366735708.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6515
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 338
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.038297 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6567
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 342
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.048874 s

[I 2025-11-19 22:49:47,307] Trial 7 finished with value: 0.7009443953965019 and parameters: {'learning_rate': 0.014590887892621765, 'n_estimators': 498, 'min_child_samples': 160, 'reg_lambda': 2.3777377263103605, 'max_depth': 20}. Best is trial 3 with value: 0.8110401366735708.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093721 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10793
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1468
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.077485 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10904
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1477
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.0992

[I 2025-11-19 22:50:36,243] Trial 8 finished with value: 0.7721707267649601 and parameters: {'learning_rate': 0.05499703124152114, 'n_estimators': 488, 'min_child_samples': 42, 'reg_lambda': 0.004830483707678875, 'max_depth': 3}. Best is trial 3 with value: 0.8110401366735708.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.097924 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9951
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1223
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.091640 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9952
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1201
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098743

[I 2025-11-19 22:51:07,696] Trial 9 finished with value: 0.7827405409591891 and parameters: {'learning_rate': 0.08984773866074167, 'n_estimators': 269, 'min_child_samples': 51, 'reg_lambda': 0.016269759705809825, 'max_depth': 5}. Best is trial 3 with value: 0.8110401366735708.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.188101 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19532
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4532
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.119174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19716
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4535
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.3541

[I 2025-11-19 22:52:02,765] Trial 10 finished with value: 0.8422721743990832 and parameters: {'learning_rate': 0.09819034231187426, 'n_estimators': 361, 'min_child_samples': 11, 'reg_lambda': 0.00018275146189573424, 'max_depth': 8}. Best is trial 10 with value: 0.8422721743990832.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.471352 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19532
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4532
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.268940 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19716
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4535
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.3815

[I 2025-11-19 22:52:58,595] Trial 11 finished with value: 0.8424425901922752 and parameters: {'learning_rate': 0.09782281851443844, 'n_estimators': 365, 'min_child_samples': 11, 'reg_lambda': 0.00010343957618591541, 'max_depth': 8}. Best is trial 11 with value: 0.8424425901922752.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.157049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17484
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 3776
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.176979 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17652
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3792
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.1217

[I 2025-11-19 22:54:06,152] Trial 12 finished with value: 0.8455794335057488 and parameters: {'learning_rate': 0.0989980700855162, 'n_estimators': 345, 'min_child_samples': 16, 'reg_lambda': 0.00010342823432952392, 'max_depth': 10}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.052469 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8246
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 753
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055080 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8313
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 758
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054344 s

[I 2025-11-19 22:55:06,685] Trial 13 finished with value: 0.7618058997699992 and parameters: {'learning_rate': 0.08274065129294644, 'n_estimators': 322, 'min_child_samples': 81, 'reg_lambda': 0.0006794555605811907, 'max_depth': 9}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061586 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8476
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 816
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058791 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8467
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 799
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044505 s

[I 2025-11-19 22:56:11,235] Trial 14 finished with value: 0.7678067362913094 and parameters: {'learning_rate': 0.09987871850623145, 'n_estimators': 274, 'min_child_samples': 76, 'reg_lambda': 0.0014205063600070836, 'max_depth': 9}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.150480 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19718
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4594
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.246751 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19911
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4604
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1991

[I 2025-11-19 22:57:09,760] Trial 15 finished with value: 0.8106993015997052 and parameters: {'learning_rate': 0.029080517746481793, 'n_estimators': 394, 'min_child_samples': 10, 'reg_lambda': 0.00012121789107981772, 'max_depth': 11}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011576 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6236
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 277
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018068 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6233
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 270
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011798 s

[I 2025-11-19 22:58:12,324] Trial 16 finished with value: 0.6905111684229904 and parameters: {'learning_rate': 0.08561571653270024, 'n_estimators': 219, 'min_child_samples': 195, 'reg_lambda': 0.09512825102957909, 'max_depth': 7}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.029419 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9015
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031799 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9022
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 946
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045289 s

[I 2025-11-19 22:59:56,883] Trial 17 finished with value: 0.7755806398954178 and parameters: {'learning_rate': 0.04082828789200908, 'n_estimators': 328, 'min_child_samples': 65, 'reg_lambda': 0.0016757720947037913, 'max_depth': 13}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031603 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7274
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 512
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024439 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7286
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 505
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031156 s

[I 2025-11-19 23:01:37,019] Trial 18 finished with value: 0.656722130041106 and parameters: {'learning_rate': 0.004219085053048764, 'n_estimators': 426, 'min_child_samples': 114, 'reg_lambda': 0.6046165693238673, 'max_depth': 11}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.155925 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13097
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2206
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.074222 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13140
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 2181
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:02:15,449] Trial 19 finished with value: 0.8325888501527322 and parameters: {'learning_rate': 0.07293214499964076, 'n_estimators': 303, 'min_child_samples': 28, 'reg_lambda': 0.00010799483228115442, 'max_depth': 16}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9220
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1014
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046208 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9196
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 993
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026804 

[I 2025-11-19 23:02:47,502] Trial 20 finished with value: 0.77595551279066 and parameters: {'learning_rate': 0.09089047239056923, 'n_estimators': 233, 'min_child_samples': 62, 'reg_lambda': 0.002680689963044019, 'max_depth': 7}. Best is trial 12 with value: 0.8455794335057488.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.365709 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19718
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4594
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.339099 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19911
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4604
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.2814

[I 2025-11-19 23:04:22,868] Trial 21 finished with value: 0.8488527315156356 and parameters: {'learning_rate': 0.09838290201195837, 'n_estimators': 373, 'min_child_samples': 10, 'reg_lambda': 0.00023837003799007944, 'max_depth': 9}. Best is trial 21 with value: 0.8488527315156356.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.189555 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12895
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2139
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.175125 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12926
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 2111
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:06:52,209] Trial 22 finished with value: 0.827201663936108 and parameters: {'learning_rate': 0.08090210992477515, 'n_estimators': 384, 'min_child_samples': 29, 'reg_lambda': 0.0004800070736278244, 'max_depth': 10}. Best is trial 21 with value: 0.8488527315156356.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058555 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7745
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 627
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093297 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7785
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 625
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040209 s

[I 2025-11-19 23:07:51,371] Trial 23 finished with value: 0.7506223271591516 and parameters: {'learning_rate': 0.09254211600363661, 'n_estimators': 428, 'min_child_samples': 96, 'reg_lambda': 0.0003222831669863974, 'max_depth': 7}. Best is trial 21 with value: 0.8488527315156356.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.272126 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13264
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2264
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.110956 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13380
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 2260
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1670

[I 2025-11-19 23:09:06,108] Trial 24 finished with value: 0.82877009605529 and parameters: {'learning_rate': 0.09042482486096316, 'n_estimators': 353, 'min_child_samples': 27, 'reg_lambda': 0.0008832277999300281, 'max_depth': 9}. Best is trial 21 with value: 0.8488527315156356.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.355442 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19718
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4594
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.354216 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19911
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4604
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.2781

[I 2025-11-19 23:10:37,450] Trial 25 finished with value: 0.8508983801679806 and parameters: {'learning_rate': 0.09923752293865394, 'n_estimators': 297, 'min_child_samples': 10, 'reg_lambda': 0.00013052105541764322, 'max_depth': 12}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.094635 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9697
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1149
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084836 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9705
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1130
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.065778

[I 2025-11-19 23:11:33,537] Trial 26 finished with value: 0.7896961523200319 and parameters: {'learning_rate': 0.07729618060555626, 'n_estimators': 302, 'min_child_samples': 54, 'reg_lambda': 0.005478797001936625, 'max_depth': 12}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.145387 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12080
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1872
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.137277 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12160
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1863
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1243

[I 2025-11-19 23:13:33,877] Trial 27 finished with value: 0.8190868589959814 and parameters: {'learning_rate': 0.06727270745444298, 'n_estimators': 247, 'min_child_samples': 33, 'reg_lambda': 0.0002804600446748048, 'max_depth': 15}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.330750 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15029
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2875
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.222039 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 15239
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 2913
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:15:13,260] Trial 28 finished with value: 0.8407718876722878 and parameters: {'learning_rate': 0.08668831881305508, 'n_estimators': 321, 'min_child_samples': 21, 'reg_lambda': 0.0007804443025801506, 'max_depth': 12}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044373 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8516
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 826
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039282 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8519
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 814
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044398 s

[I 2025-11-19 23:15:48,189] Trial 29 finished with value: 0.7680794817724955 and parameters: {'learning_rate': 0.09480294723078854, 'n_estimators': 192, 'min_child_samples': 75, 'reg_lambda': 0.030805160844820646, 'max_depth': 10}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032806 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6852
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 413
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035417 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6870
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 409
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020118 s

[I 2025-11-19 23:16:40,996] Trial 30 finished with value: 0.7241297282370122 and parameters: {'learning_rate': 0.043174317770504136, 'n_estimators': 407, 'min_child_samples': 137, 'reg_lambda': 0.4470526947194866, 'max_depth': 17}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.342308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19532
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4532
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.431009 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19716
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4535
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:17:46,153] Trial 31 finished with value: 0.8354188264640824 and parameters: {'learning_rate': 0.09905168973443824, 'n_estimators': 378, 'min_child_samples': 11, 'reg_lambda': 0.00013030361503640637, 'max_depth': 6}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.227321 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16957
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 3578
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.193798 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 17096
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3590
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:18:54,099] Trial 32 finished with value: 0.8472159935799088 and parameters: {'learning_rate': 0.0833019256470714, 'n_estimators': 440, 'min_child_samples': 17, 'reg_lambda': 0.00010307020405726014, 'max_depth': 10}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113330 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11314
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1627
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.079876 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11407
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1628
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1523

[I 2025-11-19 23:20:09,405] Trial 33 finished with value: 0.7959356930136702 and parameters: {'learning_rate': 0.08559490494117214, 'n_estimators': 446, 'min_child_samples': 38, 'reg_lambda': 39.04748260114951, 'max_depth': 13}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.176417 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14713
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2763
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.129345 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 14817
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 2766
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:21:20,038] Trial 34 finished with value: 0.8423061354958622 and parameters: {'learning_rate': 0.0936345387129429, 'n_estimators': 457, 'min_child_samples': 22, 'reg_lambda': 0.00026328405647756305, 'max_depth': 10}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.157289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10032
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1246
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.061238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10037
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1225
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.0372

[I 2025-11-19 23:22:22,564] Trial 35 finished with value: 0.7966859061267018 and parameters: {'learning_rate': 0.07989037631014506, 'n_estimators': 420, 'min_child_samples': 50, 'reg_lambda': 0.00234491543026868, 'max_depth': 14}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.125364 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16957
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 3578
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.114845 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17096
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3590
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1475

[I 2025-11-19 23:23:02,653] Trial 36 finished with value: 0.8343618893356771 and parameters: {'learning_rate': 0.06226740609896337, 'n_estimators': 341, 'min_child_samples': 17, 'reg_lambda': 0.000498365488339681, 'max_depth': 11}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022871 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7907
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 667
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021584 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7986
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 677
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031374 s

[I 2025-11-19 23:23:23,896] Trial 37 finished with value: 0.7339153961212842 and parameters: {'learning_rate': 0.071332720667876, 'n_estimators': 75, 'min_child_samples': 91, 'reg_lambda': 0.00024110580829199375, 'max_depth': 12}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.114979 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11902
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1816
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.065549 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12007
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1814
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:24:03,344] Trial 38 finished with value: 0.8021411017239264 and parameters: {'learning_rate': 0.08717814405049973, 'n_estimators': 123, 'min_child_samples': 34, 'reg_lambda': 0.0010703387095440723, 'max_depth': 13}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.029095 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9321
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1042
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039570 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9334
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1028
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.033859

[I 2025-11-19 23:24:41,714] Trial 39 finished with value: 0.7833543726129233 and parameters: {'learning_rate': 0.0950470236928104, 'n_estimators': 290, 'min_child_samples': 60, 'reg_lambda': 0.004653039488109496, 'max_depth': 8}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040945 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10348
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1338
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10436
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1345
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choos

[I 2025-11-19 23:27:11,581] Trial 40 finished with value: 0.8016979822993285 and parameters: {'learning_rate': 0.07524561067328812, 'n_estimators': 477, 'min_child_samples': 46, 'reg_lambda': 0.0002138956567583015, 'max_depth': 14}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.103732 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15428
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 3021
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.114470 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15618
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3050
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1040

[I 2025-11-19 23:27:55,738] Trial 41 finished with value: 0.8378396409601322 and parameters: {'learning_rate': 0.0984365009480072, 'n_estimators': 365, 'min_child_samples': 20, 'reg_lambda': 0.00010181353360526621, 'max_depth': 8}. Best is trial 25 with value: 0.8508983801679806.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.131984 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19532
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4532
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.102543 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19716
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4535
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1679

[I 2025-11-19 23:29:44,680] Trial 42 finished with value: 0.8515803572141015 and parameters: {'learning_rate': 0.09540361594963953, 'n_estimators': 401, 'min_child_samples': 11, 'reg_lambda': 0.00010070195185693874, 'max_depth': 10}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.087953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15884
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 3178
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.099411 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16073
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3214
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.0936

[I 2025-11-19 23:30:33,837] Trial 43 finished with value: 0.8442155945004032 and parameters: {'learning_rate': 0.09077491081714516, 'n_estimators': 399, 'min_child_samples': 19, 'reg_lambda': 0.0005384666476469735, 'max_depth': 10}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035339 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11181
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1585
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.051440 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11242
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1577
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.0570

[I 2025-11-19 23:31:20,914] Trial 44 finished with value: 0.8116880479984617 and parameters: {'learning_rate': 0.08193916009966903, 'n_estimators': 341, 'min_child_samples': 39, 'reg_lambda': 0.0001975196548787599, 'max_depth': 9}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.019530 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6786
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 398
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021675 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6810
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 395
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018870 s

[I 2025-11-19 23:31:47,542] Trial 45 finished with value: 0.7188107048501831 and parameters: {'learning_rate': 0.09434462445011975, 'n_estimators': 415, 'min_child_samples': 142, 'reg_lambda': 0.0003998989746950328, 'max_depth': 11}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.095821 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13749
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 2427
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.065946 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13885
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 2433
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.0759

[I 2025-11-19 23:33:16,852] Trial 46 finished with value: 0.8312931704896999 and parameters: {'learning_rate': 0.09930922382319587, 'n_estimators': 447, 'min_child_samples': 25, 'reg_lambda': 0.00017453701793697943, 'max_depth': 6}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.187993 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19718
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 4594
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.121169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19911
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 4604
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1599

[I 2025-11-19 23:36:45,693] Trial 47 finished with value: 0.8222919139639938 and parameters: {'learning_rate': 0.033122360253088526, 'n_estimators': 473, 'min_child_samples': 10, 'reg_lambda': 0.0001017810779554058, 'max_depth': 10}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073856 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11743
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 1764
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053695 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11890
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 1776
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.0480

[I 2025-11-19 23:37:31,948] Trial 48 finished with value: 0.8085853541057787 and parameters: {'learning_rate': 0.08617097624468265, 'n_estimators': 261, 'min_child_samples': 35, 'reg_lambda': 5.906852627899193, 'max_depth': 12}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.134270 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 16957
[LightGBM] [Info] Number of data points in the train set: 19552, number of used features: 3578
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.126468 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17096
[LightGBM] [Info] Number of data points in the train set: 19553, number of used features: 3590
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.1493

[I 2025-11-19 23:39:40,361] Trial 49 finished with value: 0.8273039831616571 and parameters: {'learning_rate': 0.056266079586490436, 'n_estimators': 380, 'min_child_samples': 17, 'reg_lambda': 0.00140558761778541, 'max_depth': 9}. Best is trial 42 with value: 0.8515803572141015.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.215394 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 21633
[LightGBM] [Info] Number of data points in the train set: 29329, number of used features: 4702
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Test accuracy: 0.8748124914768853
{'-1': {'precision': 0.8114856429463171, 'recall': 0.7893139040680024, 'f1-score': 0.8002462296091105, 'support': 1647.0}, '0': {'precision': 0.8597173144876326, 'recall': 0.9693227091633466, 'f1-score': 0.9112359550561798, 'support': 2510.0}, '1': {'precision': 0.9245087900723888, 'recall': 0.8444584382871536, 'f1-score': 0.8826723712358071, 'support': 3176.0}, 'accuracy': 0.8748124914768853, 'macro avg': {'precision': 0.8652372491687794, 'recall': 0.8676983505061675, 'f1-score': 0.8647181853003657, 'support': 7333.0}, 'weighted avg': {'precision': 0.8769463017273216, 'recall': 0.8748124914768853, 'f1-score': 0.873936347797919, 'support': 7333.0}}
🏃 View run Custom_feature_reddit at: http://ec2-13-62-226-249.eu-north-1.compute.amazonaws.com:5000/#/experiments/14/runs/762a2aac8509498fa4de847a8de99a75
🧪 View experiment at: http://ec2-13-62-226-249.eu-north-1.compute.amazonaws.com:5000/#/experiments/14


In [19]:
#extract the best hyperparameter
best_params = study.best_params
best_params

{'learning_rate': 0.09540361594963953,
 'n_estimators': 401,
 'min_child_samples': 11,
 'reg_lambda': 0.00010070195185693874,
 'max_depth': 10}