In [1]:
import numpy as np
import pandas as pd
import mlflow

import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_predict, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN

import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
# from lightgbm import LGBMClassifier
import lightgbm as lgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sudhirjoon/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/sudhirjoon/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [2]:
mlflow.set_tracking_uri("http://13.60.79.0:5000")
mlflow.set_experiment("Exp 5 - ML Algos with HP Tuning")

<Experiment: artifact_location='s3://capstone-yt-mlflow-bucket/542391321365143307', creation_time=1735485482149, experiment_id='542391321365143307', last_update_time=1735485482149, lifecycle_stage='active', name='Exp 5 - ML Algos with HP Tuning', tags={}>

In [3]:
df = pd.read_csv('reddit_preprocessing.csv').dropna(subset=['clean_comment'])
display(df.head(2))
display(df.shape)

Unnamed: 0,clean_comment,category
0,family mormon never tried explain still stare ...,1
1,buddhism much lot compatible christianity espe...,1


(36662, 2)

In [4]:
# Define TF-IDF vectorization parameters
ngram_range = (1, 2)  # Bigram setting
max_features = 1000  # Set max_features to 1000 for TF-IDF

# Train-test split before vectorization and resampling
X_train, X_test, y_train, y_test = train_test_split(
    df['clean_comment'], df['category'], test_size=0.2, random_state=42, stratify=df['category']
)

# Vectorization using TF-IDF
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X_train_vec = vectorizer.fit_transform(X_train)  # Fit on training data
X_test_vec = vectorizer.transform(X_test)  # Transform test data

# Handle imbalance using ADASYN
adasyn = ADASYN(random_state=42)
X_train_vec, y_train = adasyn.fit_resample(X_train_vec, y_train)


# Function to log model and metrics in MLFlow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        # Log model name and experiment type
        mlflow.set_tag("mlflow.runName", f"{model_name}_ADASYN_TFIDF_Bigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log model algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model and evaluate
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log detailed classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model itself
        mlflow.sklearn.log_model(model, f"{model_name}_model")


# Optuna objective function for LightGBM
def objective_lgbm(trial):
    # Suggest hyperparameters for LGBM
    num_leaves = trial.suggest_int("num_leaves", 10, 150)  # Number of leaves in one tree
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate
    n_estimators = trial.suggest_int("n_estimators", 50, 500)  # Number of boosting iterations
    max_depth = trial.suggest_int("max_depth", 3, 30)  # Maximum depth of tree

    # Initialize the LGBMClassifier
    model = lgb.LGBMClassifier(
        num_leaves=num_leaves,
        learning_rate=learning_rate,
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )

    # Train and evaluate the model
    model.fit(X_train_vec, y_train)
    y_pred = model.predict(X_test_vec)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

# Function to log model and metrics in MLFlow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        # Log model name and experiment type
        mlflow.set_tag("mlflow.runName", f"{model_name}_ADASYN_TFIDF_Bigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log model algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model and evaluate
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log detailed classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model itself
        mlflow.sklearn.log_model(model, f"{model_name}_model")

# Run Optuna to optimize LightGBM hyperparameters
def run_optuna_experiment():
    # Create an Optuna study
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lgbm, n_trials=50)

    # Get the best hyperparameters and retrain the best model
    best_params = study.best_params
    best_model = lgb.LGBMClassifier(
        num_leaves=best_params['num_leaves'],
        learning_rate=best_params['learning_rate'],
        n_estimators=best_params['n_estimators'],
        max_depth=best_params['max_depth'],
        random_state=42
    )

    # Log the best model with MLFlow
    log_mlflow("LGBM", best_model, X_train_vec, X_test_vec, y_train, y_test)

# Run the Optuna experiment for LGBM
run_optuna_experiment()

[I 2024-12-30 11:31:26,786] A new study created in memory with name: no-name-25e81a66-73f7-44e2-af27-7c5f98199364
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.080683 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:31:49,861] Trial 0 finished with value: 0.6838947224873858 and parameters: {'num_leaves': 78, 'learning_rate': 0.0024060157986524272, 'n_estimators': 231, 'max_depth': 17}. Best is trial 0 with value: 0.6838947224873858.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.105911 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:31:54,396] Trial 1 finished with value: 0.4301104595663439 and parameters: {'num_leaves': 14, 'learning_rate': 1.055581250532087e-05, 'n_estimators': 126, 'max_depth': 14}. Best is trial 0 with value: 0.6838947224873858.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.115039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:32:42,638] Trial 2 finished with value: 0.774580662757398 and parameters: {'num_leaves': 105, 'learning_rate': 0.014707434208999897, 'n_estimators': 485, 'max_depth': 14}. Best is trial 2 with value: 0.774580662757398.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.112664 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:32:55,231] Trial 3 finished with value: 0.4301104595663439 and parameters: {'num_leaves': 76, 'learning_rate': 3.955152632881135e-05, 'n_estimators': 134, 'max_depth': 13}. Best is trial 2 with value: 0.774580662757398.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085631 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:32:57,963] Trial 4 finished with value: 0.45670257738988135 and parameters: {'num_leaves': 28, 'learning_rate': 0.00030862220646530626, 'n_estimators': 117, 'max_depth': 4}. Best is trial 2 with value: 0.774580662757398.




  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.106657 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:33:00,065] Trial 5 finished with value: 0.45083867448520387 and parameters: {'num_leaves': 55, 'learning_rate': 0.0005339326799622786, 'n_estimators': 129, 'max_depth': 3}. Best is trial 2 with value: 0.774580662757398.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094358 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:33:14,371] Trial 6 finished with value: 0.7558979953634256 and parameters: {'num_leaves': 113, 'learning_rate': 0.026290503469359627, 'n_estimators': 95, 'max_depth': 23}. Best is trial 2 with value: 0.774580662757398.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108238 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:33:30,909] Trial 7 finished with value: 0.6758489022228283 and parameters: {'num_leaves': 139, 'learning_rate': 0.005863219733741898, 'n_estimators': 396, 'max_depth': 6}. Best is trial 2 with value: 0.774580662757398.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.091820 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:33:51,895] Trial 8 finished with value: 0.4747033956088913 and parameters: {'num_leaves': 25, 'learning_rate': 5.719667178011692e-05, 'n_estimators': 458, 'max_depth': 8}. Best is trial 2 with value: 0.774580662757398.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.153830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:34:27,225] Trial 9 finished with value: 0.787126687576708 and parameters: {'num_leaves': 114, 'learning_rate': 0.045441648086304576, 'n_estimators': 238, 'max_depth': 23}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.112526 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:35:08,689] Trial 10 finished with value: 0.787126687576708 and parameters: {'num_leaves': 130, 'learning_rate': 0.05141410310215522, 'n_estimators': 280, 'max_depth': 30}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.104251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:35:44,081] Trial 11 finished with value: 0.7835810718669031 and parameters: {'num_leaves': 148, 'learning_rate': 0.08882746518892558, 'n_estimators': 263, 'max_depth': 30}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.091532 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:36:27,800] Trial 12 finished with value: 0.784399290876858 and parameters: {'num_leaves': 117, 'learning_rate': 0.08123779607330485, 'n_estimators': 343, 'max_depth': 30}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.138107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:36:59,962] Trial 13 finished with value: 0.7497613527887631 and parameters: {'num_leaves': 124, 'learning_rate': 0.009976293222299612, 'n_estimators': 208, 'max_depth': 24}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:37:43,076] Trial 14 finished with value: 0.7239874539751807 and parameters: {'num_leaves': 97, 'learning_rate': 0.0031496974517498224, 'n_estimators': 328, 'max_depth': 25}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.114220 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:38:11,208] Trial 15 finished with value: 0.782490113186963 and parameters: {'num_leaves': 135, 'learning_rate': 0.04077518366640047, 'n_estimators': 206, 'max_depth': 20}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092618 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:38:49,932] Trial 16 finished with value: 0.787126687576708 and parameters: {'num_leaves': 88, 'learning_rate': 0.0277325493037883, 'n_estimators': 326, 'max_depth': 27}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113733 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:39:34,312] Trial 17 finished with value: 0.6892131460520933 and parameters: {'num_leaves': 128, 'learning_rate': 0.0011440463214721444, 'n_estimators': 276, 'max_depth': 20}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098942 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:39:53,903] Trial 18 finished with value: 0.7353061502795581 and parameters: {'num_leaves': 58, 'learning_rate': 0.00652257490913405, 'n_estimators': 177, 'max_depth': 27}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092144 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:40:05,069] Trial 19 finished with value: 0.4307923087413064 and parameters: {'num_leaves': 150, 'learning_rate': 0.00018882360357848642, 'n_estimators': 61, 'max_depth': 21}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.112721 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:40:55,500] Trial 20 finished with value: 0.7869903177417156 and parameters: {'num_leaves': 102, 'learning_rate': 0.0387601606223295, 'n_estimators': 385, 'max_depth': 27}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.138922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:41:36,989] Trial 21 finished with value: 0.7804445656620755 and parameters: {'num_leaves': 92, 'learning_rate': 0.0172634043212782, 'n_estimators': 289, 'max_depth': 27}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.136318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:42:13,479] Trial 22 finished with value: 0.7845356607118505 and parameters: {'num_leaves': 88, 'learning_rate': 0.09031859135247387, 'n_estimators': 319, 'max_depth': 30}. Best is trial 9 with value: 0.787126687576708.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.107755 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:47:45,256] Trial 23 finished with value: 0.7906723032865131 and parameters: {'num_leaves': 64, 'learning_rate': 0.04595510570911088, 'n_estimators': 388, 'max_depth': 26}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098541 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:53:17,849] Trial 24 finished with value: 0.787399427246693 and parameters: {'num_leaves': 61, 'learning_rate': 0.052938248612671227, 'n_estimators': 417, 'max_depth': 23}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.090583 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:53:53,480] Trial 25 finished with value: 0.7066684849311332 and parameters: {'num_leaves': 61, 'learning_rate': 0.002475691692112946, 'n_estimators': 421, 'max_depth': 18}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093634 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:59:17,813] Trial 26 finished with value: 0.7794899768171281 and parameters: {'num_leaves': 40, 'learning_rate': 0.01387624909507032, 'n_estimators': 373, 'max_depth': 22}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093527 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 11:59:58,237] Trial 27 finished with value: 0.7500340924587481 and parameters: {'num_leaves': 68, 'learning_rate': 0.004322518603361943, 'n_estimators': 435, 'max_depth': 25}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.097249 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:00:26,216] Trial 28 finished with value: 0.787944906586663 and parameters: {'num_leaves': 44, 'learning_rate': 0.02282259955533073, 'n_estimators': 474, 'max_depth': 19}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.113317 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:06:01,917] Trial 29 finished with value: 0.6856675303422883 and parameters: {'num_leaves': 46, 'learning_rate': 0.0013076676182875238, 'n_estimators': 495, 'max_depth': 17}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.104916 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:06:31,205] Trial 30 finished with value: 0.7588981317332606 and parameters: {'num_leaves': 44, 'learning_rate': 0.007702168442821659, 'n_estimators': 430, 'max_depth': 19}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084211 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:07:09,410] Trial 31 finished with value: 0.7891722351015955 and parameters: {'num_leaves': 72, 'learning_rate': 0.02533931424501495, 'n_estimators': 471, 'max_depth': 22}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.086895 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:07:48,052] Trial 32 finished with value: 0.783035592526933 and parameters: {'num_leaves': 71, 'learning_rate': 0.016628503815574087, 'n_estimators': 463, 'max_depth': 21}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.105316 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:13:26,088] Trial 33 finished with value: 0.784399290876858 and parameters: {'num_leaves': 79, 'learning_rate': 0.023311966808900433, 'n_estimators': 471, 'max_depth': 15}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.085089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:13:49,216] Trial 34 finished with value: 0.7894449747715805 and parameters: {'num_leaves': 50, 'learning_rate': 0.051972314532625234, 'n_estimators': 415, 'max_depth': 15}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.083723 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:14:11,603] Trial 35 finished with value: 0.7710350470475931 and parameters: {'num_leaves': 34, 'learning_rate': 0.01469910115982746, 'n_estimators': 499, 'max_depth': 12}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.103664 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:14:33,485] Trial 36 finished with value: 0.7812627846720306 and parameters: {'num_leaves': 51, 'learning_rate': 0.02755278381330274, 'n_estimators': 442, 'max_depth': 10}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.084889 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:14:42,219] Trial 37 finished with value: 0.4301104595663439 and parameters: {'num_leaves': 10, 'learning_rate': 1.293140779778553e-05, 'n_estimators': 370, 'max_depth': 16}. Best is trial 23 with value: 0.7906723032865131.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.083354 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:14:55,194] Trial 38 finished with value: 0.792581480976408 and parameters: {'num_leaves': 19, 'learning_rate': 0.06114206644802956, 'n_estimators': 402, 'max_depth': 12}. Best is trial 38 with value: 0.792581480976408.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085357 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:15:07,802] Trial 39 finished with value: 0.7920360016364381 and parameters: {'num_leaves': 21, 'learning_rate': 0.06616559543682184, 'n_estimators': 398, 'max_depth': 12}. Best is trial 38 with value: 0.792581480976408.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.080084 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:15:18,593] Trial 40 finished with value: 0.7929905904813855 and parameters: {'num_leaves': 20, 'learning_rate': 0.0664406458370887, 'n_estimators': 354, 'max_depth': 11}. Best is trial 40 with value: 0.7929905904813855.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.103049 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:20:32,587] Trial 41 finished with value: 0.7933996999863631 and parameters: {'num_leaves': 19, 'learning_rate': 0.06448745195087456, 'n_estimators': 400, 'max_depth': 12}. Best is trial 41 with value: 0.7933996999863631.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.086973 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:20:43,400] Trial 42 finished with value: 0.7933996999863631 and parameters: {'num_leaves': 19, 'learning_rate': 0.09899086892495712, 'n_estimators': 358, 'max_depth': 11}. Best is trial 41 with value: 0.7933996999863631.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.106914 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:20:53,938] Trial 43 finished with value: 0.7905359334515205 and parameters: {'num_leaves': 20, 'learning_rate': 0.09359613751424903, 'n_estimators': 349, 'max_depth': 11}. Best is trial 41 with value: 0.7933996999863631.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.106248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:21:03,299] Trial 44 finished with value: 0.790945042956498 and parameters: {'num_leaves': 17, 'learning_rate': 0.0634135905783543, 'n_estimators': 357, 'max_depth': 9}. Best is trial 41 with value: 0.7933996999863631.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098735 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:21:12,167] Trial 45 finished with value: 0.7848084003818355 and parameters: {'num_leaves': 27, 'learning_rate': 0.06767698685545949, 'n_estimators': 305, 'max_depth': 7}. Best is trial 41 with value: 0.7933996999863631.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:21:29,834] Trial 46 finished with value: 0.7880812764216555 and parameters: {'num_leaves': 33, 'learning_rate': 0.03369927017375072, 'n_estimators': 404, 'max_depth': 13}. Best is trial 41 with value: 0.7933996999863631.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.088099 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:21:39,933] Trial 47 finished with value: 0.4695213418791763 and parameters: {'num_leaves': 22, 'learning_rate': 0.0003697069314655248, 'n_estimators': 366, 'max_depth': 5}. Best is trial 41 with value: 0.7933996999863631.




  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098890 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:21:49,568] Trial 48 finished with value: 0.797490795036138 and parameters: {'num_leaves': 11, 'learning_rate': 0.09485317120820064, 'n_estimators': 444, 'max_depth': 13}. Best is trial 48 with value: 0.797490795036138.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 0.1)  # Learning rate


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.087916 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880


[I 2024-12-30 12:21:58,456] Trial 49 finished with value: 0.7951725078412655 and parameters: {'num_leaves': 10, 'learning_rate': 0.09832002186955217, 'n_estimators': 450, 'max_depth': 9}. Best is trial 48 with value: 0.797490795036138.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.078662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 100027
[LightGBM] [Info] Number of data points in the train set: 35868, number of used features: 991
[LightGBM] [Info] Start training from score -1.071711
[LightGBM] [Info] Start training from score -1.184681
[LightGBM] [Info] Start training from score -1.044880




🏃 View run LGBM_ADASYN_TFIDF_Bigrams at: http://13.60.79.0:5000/#/experiments/542391321365143307/runs/26a0841c651c4c9292db54030dd26cc8
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/542391321365143307
