In [1]:
import mlflow 

In [2]:
mlflow.set_tracking_uri("http://ec2-13-61-146-35.eu-north-1.compute.amazonaws.com:5000/")

In [3]:
mlflow.set_experiment("Boosting")

<Experiment: artifact_location='s3://my-mlflow-bucket-123/581382709887216997', creation_time=1755545838660, experiment_id='581382709887216997', last_update_time=1755545838660, lifecycle_stage='active', name='Boosting', tags={}>

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv("dataset.csv")

In [6]:
df.shape

(39574, 5)

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
import optuna
import mlflow.sklearn
from lightgbm import LGBMClassifier


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
ngram_range = (1, 1)
max_features = 8000

x_train, x_test, y_train, y_test = train_test_split(df['text'], df['label'], random_state=42, test_size=0.2)

vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
x_train_vec = vectorizer.fit_transform(x_train)
x_test_vec = vectorizer.transform(x_test)

def log_mlflow(model_name, model, x_train, x_test, y_train, y_test):
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"{model_name}_TF-IDF")
        mlflow.set_tag("experiment_type", "algo_comparison")

        mlflow.log_param("algo_name", model_name)

        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)

        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        mlflow.sklearn.log_model(model, f"{model_name}")

def objective_xgboost(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int("max_depth", 3, 10)

    model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth)
    return accuracy_score(y_test, model.fit(x_train_vec, y_train).predict(x_test_vec))

def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_xgboost, n_trials=30)

    best_params = study.best_params
    best_model = XGBClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    log_mlflow("XGBoost", best_model, x_train_vec, x_test_vec, y_train, y_test)

run_optuna_experiment() 

[I 2025-08-20 22:50:18,352] A new study created in memory with name: no-name-76130bea-69d1-4227-ae14-0119433b20db
[I 2025-08-20 22:51:27,147] Trial 0 finished with value: 0.7075173720783323 and parameters: {'n_estimators': 112, 'learning_rate': 0.0006010005859381069, 'max_depth': 8}. Best is trial 0 with value: 0.7075173720783323.
[I 2025-08-20 22:51:46,335] Trial 1 finished with value: 0.6670878079595705 and parameters: {'n_estimators': 138, 'learning_rate': 0.00022822397381522356, 'max_depth': 4}. Best is trial 0 with value: 0.7075173720783323.
[I 2025-08-20 22:52:11,420] Trial 2 finished with value: 0.6673404927353127 and parameters: {'n_estimators': 175, 'learning_rate': 0.0002723254950620502, 'max_depth': 4}. Best is trial 0 with value: 0.7075173720783323.
[I 2025-08-20 22:53:32,154] Trial 3 finished with value: 0.7005685407454201 and parameters: {'n_estimators': 169, 'learning_rate': 0.000850852477543774, 'max_depth': 6}. Best is trial 0 with value: 0.7075173720783323.
[I 2025-08

🏃 View run XGBoost_TF-IDF at: http://ec2-13-61-146-35.eu-north-1.compute.amazonaws.com:5000/#/experiments/581382709887216997/runs/284aae54f2624f5d9f016eeaeeafff52
🧪 View experiment at: http://ec2-13-61-146-35.eu-north-1.compute.amazonaws.com:5000/#/experiments/581382709887216997


In [9]:
def objective_lightgbm(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int("max_depth", 3, 10)

    model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth)
    return accuracy_score(y_test, model.fit(x_train_vec, y_train).predict(x_test_vec))

def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=30)

    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], random_state=42)

    log_mlflow("LightGBM", best_model, x_train_vec, x_test_vec, y_train, y_test)

run_optuna_experiment()

[I 2025-08-20 23:53:24,751] A new study created in memory with name: no-name-2171d51e-86d6-4079-a806-c3787120108b


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.351937 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:53:38,977] Trial 0 finished with value: 0.6937460518003791 and parameters: {'n_estimators': 119, 'learning_rate': 0.00560854756723986, 'max_depth': 4}. Best is trial 0 with value: 0.6937460518003791.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.351914 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:53:46,354] Trial 1 finished with value: 0.8181933038534428 and parameters: {'n_estimators': 83, 'learning_rate': 0.07245309169628412, 'max_depth': 8}. Best is trial 1 with value: 0.8181933038534428.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.347800 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:53:58,594] Trial 2 finished with value: 0.7187618445988629 and parameters: {'n_estimators': 121, 'learning_rate': 0.0010612698086443083, 'max_depth': 8}. Best is trial 1 with value: 0.8181933038534428.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.380583 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:54:25,122] Trial 3 finished with value: 0.7267214150347442 and parameters: {'n_estimators': 271, 'learning_rate': 0.00044663210287193284, 'max_depth': 9}. Best is trial 1 with value: 0.8181933038534428.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.355418 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:54:44,644] Trial 4 finished with value: 0.8327226784586229 and parameters: {'n_estimators': 299, 'learning_rate': 0.037697766051524674, 'max_depth': 6}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.369106 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:54:53,274] Trial 5 finished with value: 0.685912823752369 and parameters: {'n_estimators': 202, 'learning_rate': 0.0028812097668849666, 'max_depth': 3}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.370040 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:55:16,697] Trial 6 finished with value: 0.7756159191408718 and parameters: {'n_estimators': 300, 'learning_rate': 0.008039675874420404, 'max_depth': 7}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.352592 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:55:22,084] Trial 7 finished with value: 0.7622236260265319 and parameters: {'n_estimators': 67, 'learning_rate': 0.03555057859453074, 'max_depth': 5}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.341446 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:55:30,960] Trial 8 finished with value: 0.8307012002526848 and parameters: {'n_estimators': 110, 'learning_rate': 0.08146925489817448, 'max_depth': 8}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.349481 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:55:47,991] Trial 9 finished with value: 0.7319014529374606 and parameters: {'n_estimators': 144, 'learning_rate': 0.001265137217685654, 'max_depth': 10}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.350437 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:56:07,736] Trial 10 finished with value: 0.69943145925458 and parameters: {'n_estimators': 237, 'learning_rate': 0.00010465880152429817, 'max_depth': 6}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.359219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:56:22,191] Trial 11 finished with value: 0.7998736576121289 and parameters: {'n_estimators': 208, 'learning_rate': 0.02358175124498402, 'max_depth': 6}. Best is trial 4 with value: 0.8327226784586229.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.365864 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:56:33,584] Trial 12 finished with value: 0.8419456727732154 and parameters: {'n_estimators': 168, 'learning_rate': 0.08128834124220007, 'max_depth': 7}. Best is trial 12 with value: 0.8419456727732154.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.326021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:56:43,435] Trial 13 finished with value: 0.7595704358812382 and parameters: {'n_estimators': 165, 'learning_rate': 0.013121887627431397, 'max_depth': 5}. Best is trial 12 with value: 0.8419456727732154.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.313744 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:56:57,206] Trial 14 finished with value: 0.8501579279848389 and parameters: {'n_estimators': 199, 'learning_rate': 0.09908254273115005, 'max_depth': 7}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.347894 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:57:10,368] Trial 15 finished with value: 0.8472520530638029 and parameters: {'n_estimators': 190, 'learning_rate': 0.09835030195465774, 'max_depth': 7}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.355430 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:57:31,213] Trial 16 finished with value: 0.8006317119393557 and parameters: {'n_estimators': 209, 'learning_rate': 0.01522840523498253, 'max_depth': 10}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.374405 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:57:51,208] Trial 17 finished with value: 0.8324699936828807 and parameters: {'n_estimators': 242, 'learning_rate': 0.0368858789235924, 'max_depth': 9}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.327705 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:58:09,057] Trial 18 finished with value: 0.7273531269740998 and parameters: {'n_estimators': 191, 'learning_rate': 0.0025008257684786123, 'max_depth': 7}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.355292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:58:20,990] Trial 19 finished with value: 0.8458622867972204 and parameters: {'n_estimators': 233, 'learning_rate': 0.09629452369729304, 'max_depth': 5}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.359302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:58:36,923] Trial 20 finished with value: 0.7154769425142136 and parameters: {'n_estimators': 147, 'learning_rate': 0.00032952378957390606, 'max_depth': 9}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.401103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:58:50,984] Trial 21 finished with value: 0.8440934933670247 and parameters: {'n_estimators': 235, 'learning_rate': 0.08755106742894828, 'max_depth': 5}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.667808 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:59:16,571] Trial 22 finished with value: 0.8185723310170562 and parameters: {'n_estimators': 254, 'learning_rate': 0.04709026058381021, 'max_depth': 4}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.326307 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:59:31,575] Trial 23 finished with value: 0.7906506632975363 and parameters: {'n_estimators': 190, 'learning_rate': 0.019149737293184133, 'max_depth': 6}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.304422 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-20 23:59:40,085] Trial 24 finished with value: 0.8179406190777005 and parameters: {'n_estimators': 221, 'learning_rate': 0.050541566716173894, 'max_depth': 4}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.358709 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-21 00:00:06,530] Trial 25 finished with value: 0.7739734680985471 and parameters: {'n_estimators': 262, 'learning_rate': 0.008809397999638707, 'max_depth': 7}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 2.098320 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-21 00:00:27,385] Trial 26 finished with value: 0.8406822488945042 and parameters: {'n_estimators': 184, 'learning_rate': 0.09602959809939605, 'max_depth': 5}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.246010 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-21 00:01:04,419] Trial 27 finished with value: 0.8109917877447884 and parameters: {'n_estimators': 221, 'learning_rate': 0.02222597241393536, 'max_depth': 8}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.451642 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-21 00:01:10,832] Trial 28 finished with value: 0.7944409349336703 and parameters: {'n_estimators': 152, 'learning_rate': 0.05354008711211398, 'max_depth': 3}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.397892 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633


[I 2025-08-21 00:01:18,035] Trial 29 finished with value: 0.6969046114971573 and parameters: {'n_estimators': 120, 'learning_rate': 0.006597483173243173, 'max_depth': 4}. Best is trial 14 with value: 0.8501579279848389.


[LightGBM] [Info] Number of positive: 15882, number of negative: 15777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.395063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 553231
[LightGBM] [Info] Number of data points in the train set: 31659, number of used features: 7956
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501658 -> initscore=0.006633
[LightGBM] [Info] Start training from score 0.006633




🏃 View run LightGBM_TF-IDF at: http://ec2-13-61-146-35.eu-north-1.compute.amazonaws.com:5000/#/experiments/581382709887216997/runs/2f68f20a75df48139e1d949b378baad5
🧪 View experiment at: http://ec2-13-61-146-35.eu-north-1.compute.amazonaws.com:5000/#/experiments/581382709887216997
