In [1]:
!pip install mlflow dagshub optuna



In [1]:
import dagshub
dagshub.init(repo_owner='dakshvandanarathi', repo_name='YT-Sentiment-Analyser', mlflow=True)

In [2]:
import optuna
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
import xgboost as xgb
import lightgbm as lgb
from imblearn.over_sampling import ADASYN
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [21]:
# Set or create an experiment
mlflow.set_experiment("Exp 4 - ML Algos with HP Tuning")

<Experiment: artifact_location='mlflow-artifacts:/498f75c0643c4aa2b0e490aca486b20a', creation_time=1729654450594, experiment_id='5', last_update_time=1729654450594, lifecycle_stage='active', name='Exp 4 - ML Algos with HP Tuning', tags={}>

In [3]:
df = pd.read_csv('/content/preprocessed_data.csv').dropna()
df.shape

(36662, 5)

In [4]:
# Remap the class labels from [-1, 0, 1] to [0, 1, 2]
df['category'] = df['category'].map({-1: 0, 0: 1, 1: 2})

In [5]:
# Define a function to vectorize the data using TF-IDF
def vectorize_data(X_train, X_val, X_test, max_features, ngram_range):
    vectorizer = TfidfVectorizer(max_features=max_features, ngram_range=ngram_range)
    X_train_vec = vectorizer.fit_transform(X_train['comment']).toarray()
    X_val_vec = vectorizer.transform(X_val['comment']).toarray()
    X_test_vec = vectorizer.transform(X_test['comment']).toarray()

    # Combine additional features
    X_train_combined = np.hstack([X_train_vec, X_train[['word_count', 'char_count', 'avg_word_length']].values])
    X_val_combined = np.hstack([X_val_vec, X_val[['word_count', 'char_count', 'avg_word_length']].values])
    X_test_combined = np.hstack([X_test_vec, X_test[['word_count', 'char_count', 'avg_word_length']].values])

    return X_train_combined, X_val_combined, X_test_combined

In [6]:
# Define the function that evaluates the model on validation data
def evaluate_model(model, X_val, y_val):
    y_val_pred = model.predict(X_val)  # Predict on validation set
    f1 = f1_score(y_val, y_val_pred, average='macro')  # Calculate F1 (macro)
    accuracy = accuracy_score(y_val, y_val_pred)  # Calculate accuracy
    return f1, accuracy

In [7]:
max_features = 1006
ngram_range = (1, 2)

# Split data into training, validation and testing sets
X = df[['comment', 'word_count', 'char_count', 'avg_word_length']]
y = df['category']

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.4, random_state=42, stratify=y_temp)

### Vectorization and Resampling

In [None]:
# Vectorize the data
X_train_combined, X_val_combined, X_test_combined = vectorize_data(X_train, X_val, X_test, max_features, ngram_range)

In [None]:
# Apply resampling technique
X_resampled, y_resampled = ADASYN(random_state=42).fit_resample(X_train_combined, y_train)

### XGBoost

In [None]:
# Define the Optuna objective function
def objective(trial):
    # Hyperparameters to optimize
    n_estimators = trial.suggest_int("n_estimators", 50, 500, step=10)
    max_depth = trial.suggest_int("max_depth", 3, 15)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)

    # Initialize the XGBoost model with the suggested hyperparameters
    model = xgb.XGBClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        objective='multi:softmax',
        num_class=3,  # Number of classes
        tree_method='hist',   # Use hist method for GPU
        device='cuda',        # Specify CUDA device for GPU training
        random_state=42,
    )

    # Fit the model on the resampled training data
    model.fit(X_resampled, y_resampled)

    # Evaluate the model on the validation set
    f1, accuracy = evaluate_model(model, X_val_combined, y_val)

    return accuracy, f1

In [None]:
# Run Optuna optimization
study_xgb = optuna.create_study(directions=["maximize", "maximize"], study_name="XGBoost_Optimization")  # Multi-objective optimization for both F1 and accuracy
study_xgb.optimize(objective, n_trials=100)

[I 2024-10-23 07:32:58,321] A new study created in memory with name: XGBoost_Optimization
[I 2024-10-23 07:33:30,353] Trial 0 finished with values: [0.6193853427895981, 0.5696893450889243] and parameters: {'n_estimators': 420, 'max_depth': 6, 'learning_rate': 0.000981683522955723}.
[I 2024-10-23 07:35:32,469] Trial 1 finished with values: [0.7281323877068558, 0.6980806692126299] and parameters: {'n_estimators': 440, 'max_depth': 12, 'learning_rate': 0.006895094713133171}.
[I 2024-10-23 07:35:45,325] Trial 2 finished with values: [0.6253864338970722, 0.5822589971231921] and parameters: {'n_estimators': 210, 'max_depth': 5, 'learning_rate': 0.004823390227911081}.
[I 2024-10-23 07:36:34,716] Trial 3 finished with values: [0.6104746317512275, 0.5769254802345279] and parameters: {'n_estimators': 350, 'max_depth': 8, 'learning_rate': 0.00011252298249138059}.
[I 2024-10-23 07:37:53,359] Trial 4 finished with values: [0.6477541371158393, 0.6076811272388154] and parameters: {'n_estimators': 270

In [None]:
best_trial = sorted(study_xgb.best_trials, key=lambda t: t.values[0], reverse=True)[0]

with mlflow.start_run() as run:
    mlflow.set_tag("mlflow.runName", "XGBoost")
    mlflow.set_tag("resampling_technique", "Adasyn")
    mlflow.set_tag("vectorizer_type", "TF-IDF")

    # Log best trial parameters
    mlflow.log_params(best_trial.params)

    # Log algorithm name as a parameter
    mlflow.log_param("algo_name", "XGBoost")

    # Extract parameters from the best trial
    best_params = best_trial.params

    # Initialize the model using the best trial parameters with unpacking (**)
    model = xgb.XGBClassifier(random_state=42, objective='multi:softmax',
                              num_class=3,  # Number of classes
                              tree_method='hist',   # Use hist method for GPU
                              device='cuda',        # Specify CUDA device for GPU training
                              **best_trial.params)

    # Train the model on the resampled training data
    model.fit(X_resampled, y_resampled)

    # Predictions on the test set
    y_test_pred = model.predict(X_test_combined)

    # Log classification metrics
    classification_rep = classification_report(y_test, y_test_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_test_pred)

    # Log accuracy
    mlflow.log_metric("accuracy", accuracy)

    # Log each metric from classification report
    for label, metrics in classification_rep.items():
        if isinstance(metrics, dict):
            for metric, value in metrics.items():
                mlflow.log_metric(f"{label}_{metric}", value)

    # Generate and log confusion matrix
    conf_matrix = confusion_matrix(y_test, y_test_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix - XGBoost")

    # Save and log the confusion matrix plot
    confusion_matrix_filename = "confusion_matrix.png"
    plt.savefig(confusion_matrix_filename)
    mlflow.log_artifact(confusion_matrix_filename)
    plt.close()

2024/10/23 10:22:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGBoost at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/5/runs/ff3e2bc6b85c4886a6d4ed976fdff306.
2024/10/23 10:22:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/5.


### LightGBM

In [None]:
import lightgbm as lgb

# Define the Optuna objective function for LightGBM
def objective_lgbm(trial):
    # Hyperparameters to optimize
    n_estimators = trial.suggest_int("n_estimators", 100, 450, step=10)
    max_depth = trial.suggest_int("max_depth", 5, 15)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)

    # Initialize the LightGBM model with GPU support
    model = lgb.LGBMClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        objective='multiclass',   # Multi-class classification objective
        num_class=3,              # Number of classes
        # device='gpu',             # Enable GPU
        random_state=42,
    )

    # Fit the model on the resampled training data
    model.fit(X_resampled, y_resampled)

    # Evaluate the model on the validation set
    f1, accuracy = evaluate_model(model, X_val_combined, y_val)

    return accuracy, f1

In [None]:
# Run Optuna optimization
study_lgbm = optuna.create_study(directions=["maximize", "maximize"], study_name="LightGBM_Optimization")  # Multi-objective optimization for both F1 and accuracy
study_lgbm.optimize(objective_lgbm, n_trials=100)

[I 2024-10-23 11:11:35,863] A new study created in memory with name: LightGBM_Optimization


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.383046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:12:09,725] Trial 0 finished with values: [0.7326786688488817, 0.7028695362028694] and parameters: {'n_estimators': 440, 'max_depth': 5, 'learning_rate': 0.014217094410984728}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.236639 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:12:25,738] Trial 1 finished with values: [0.6212038552464084, 0.5688100946895994] and parameters: {'n_estimators': 130, 'max_depth': 6, 'learning_rate': 0.0018749897417486567}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226778 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:12:55,244] Trial 2 finished with values: [0.6755773777050372, 0.6382236292918357] and parameters: {'n_estimators': 170, 'max_depth': 14, 'learning_rate': 0.0011759999206392196}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.227003 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:13:47,847] Trial 3 finished with values: [0.8081469358065103, 0.7862541761727099] and parameters: {'n_estimators': 380, 'max_depth': 14, 'learning_rate': 0.061795130706726105}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.238814 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:14:52,652] Trial 4 finished with values: [0.7006728496090199, 0.6668228090400093] and parameters: {'n_estimators': 400, 'max_depth': 9, 'learning_rate': 0.0043374928878069045}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.231566 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:15:15,128] Trial 5 finished with values: [0.6661211129296236, 0.6260332176256238] and parameters: {'n_estimators': 130, 'max_depth': 12, 'learning_rate': 0.00142857318345936}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.230574 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:15:41,001] Trial 6 finished with values: [0.7132205855610111, 0.6831780428854314] and parameters: {'n_estimators': 140, 'max_depth': 15, 'learning_rate': 0.008059976879901357}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.234690 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:16:25,169] Trial 7 finished with values: [0.6833969812693217, 0.6454431782330242] and parameters: {'n_estimators': 290, 'max_depth': 10, 'learning_rate': 0.0037415351672244316}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.255941 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:16:54,058] Trial 8 finished with values: [0.6566648481542099, 0.6102089114138051] and parameters: {'n_estimators': 200, 'max_depth': 8, 'learning_rate': 0.0033001571895060343}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.371968 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:17:49,699] Trial 9 finished with values: [0.6521185670121841, 0.6065705980420164] and parameters: {'n_estimators': 360, 'max_depth': 8, 'learning_rate': 0.0014564557557891213}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.378185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:18:11,030] Trial 10 finished with values: [0.7312238588834333, 0.7020998529238875] and parameters: {'n_estimators': 180, 'max_depth': 7, 'learning_rate': 0.02272335170964598}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.224699 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:19:09,990] Trial 11 finished with values: [0.7957810511002, 0.7750485530399085] and parameters: {'n_estimators': 380, 'max_depth': 15, 'learning_rate': 0.019171383411752643}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.224258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:20:01,427] Trial 12 finished with values: [0.8030551009274414, 0.7808400626042517] and parameters: {'n_estimators': 410, 'max_depth': 12, 'learning_rate': 0.03730924722895519}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.248256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:20:23,372] Trial 13 finished with values: [0.6666666666666666, 0.6232630362826899] and parameters: {'n_estimators': 180, 'max_depth': 7, 'learning_rate': 0.007007879996270877}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.365926 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:21:21,228] Trial 14 finished with values: [0.6717585015457356, 0.6312213862884863] and parameters: {'n_estimators': 350, 'max_depth': 13, 'learning_rate': 0.00034921790371083915}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226657 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:21:32,888] Trial 15 finished with values: [0.6879432624113475, 0.6516617252892681] and parameters: {'n_estimators': 130, 'max_depth': 5, 'learning_rate': 0.020367296392913264}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.395397 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:21:57,980] Trial 16 finished with values: [0.5904709947263139, 0.5307038802245316] and parameters: {'n_estimators': 190, 'max_depth': 7, 'learning_rate': 0.00032076171102456385}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:22:44,304] Trial 17 finished with values: [0.6332060374613566, 0.5863697435639438] and parameters: {'n_estimators': 360, 'max_depth': 7, 'learning_rate': 0.0008785960811915349}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.235546 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:23:14,250] Trial 18 finished with values: [0.6173849790871068, 0.5601577577602063] and parameters: {'n_estimators': 320, 'max_depth': 5, 'learning_rate': 0.0009081505672589648}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.227392 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:24:04,215] Trial 19 finished with values: [0.680123658847063, 0.6414801687078954] and parameters: {'n_estimators': 340, 'max_depth': 9, 'learning_rate': 0.0035022573123594885}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:24:22,881] Trial 20 finished with values: [0.7108565193671577, 0.6800370023162067] and parameters: {'n_estimators': 110, 'max_depth': 11, 'learning_rate': 0.015222078426298117}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:25:10,943] Trial 21 finished with values: [0.7659574468085106, 0.7431281549921556] and parameters: {'n_estimators': 410, 'max_depth': 9, 'learning_rate': 0.014890182608853405}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225339 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:25:51,195] Trial 22 finished with values: [0.8046917621385706, 0.7830213532107729] and parameters: {'n_estimators': 300, 'max_depth': 13, 'learning_rate': 0.04859813159579442}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.240570 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:27:00,541] Trial 23 finished with values: [0.6561192944171668, 0.6054392767438943] and parameters: {'n_estimators': 450, 'max_depth': 10, 'learning_rate': 0.00017212623152044736}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.228149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:27:26,550] Trial 24 finished with values: [0.7506819421713039, 0.7248474486865121] and parameters: {'n_estimators': 220, 'max_depth': 8, 'learning_rate': 0.024876946733966055}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:27:46,030] Trial 25 finished with values: [0.47044917257683216, 0.32669347956074435] and parameters: {'n_estimators': 150, 'max_depth': 7, 'learning_rate': 0.00019401820922463654}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.415075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:28:20,073] Trial 26 finished with values: [0.7975995635570103, 0.7763002550781478] and parameters: {'n_estimators': 430, 'max_depth': 6, 'learning_rate': 0.04353016230420353}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.235286 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:28:43,167] Trial 27 finished with values: [0.6983087834151664, 0.6639836606120176] and parameters: {'n_estimators': 130, 'max_depth': 15, 'learning_rate': 0.004261049500215826}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.285689 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:29:19,081] Trial 28 finished with values: [0.6113838879796326, 0.5650940627583365] and parameters: {'n_estimators': 210, 'max_depth': 12, 'learning_rate': 0.000243423187986129}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.222684 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:29:45,222] Trial 29 finished with values: [0.695762865975632, 0.6610448503484393] and parameters: {'n_estimators': 180, 'max_depth': 9, 'learning_rate': 0.0084508175852593}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.223315 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:30:50,399] Trial 30 finished with values: [0.6721222040370977, 0.6311144456005612] and parameters: {'n_estimators': 410, 'max_depth': 12, 'learning_rate': 0.0010432827937793454}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223706 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:31:12,317] Trial 31 finished with values: [0.7081287506819421, 0.6761869065424971] and parameters: {'n_estimators': 150, 'max_depth': 10, 'learning_rate': 0.012345710751106876}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.224781 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:31:53,412] Trial 32 finished with values: [0.6817603200581924, 0.6355832893817422] and parameters: {'n_estimators': 240, 'max_depth': 14, 'learning_rate': 0.00027790546363645844}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.242639 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:32:19,719] Trial 33 finished with values: [0.7203127841425714, 0.6903929313574214] and parameters: {'n_estimators': 260, 'max_depth': 6, 'learning_rate': 0.014524797804171359}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226791 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:33:01,461] Trial 34 finished with values: [0.7154028005091835, 0.6860133417404412] and parameters: {'n_estimators': 240, 'max_depth': 15, 'learning_rate': 0.004956846826058407}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.230321 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:33:22,917] Trial 35 finished with values: [0.6846699399890889, 0.6464126368799111] and parameters: {'n_estimators': 120, 'max_depth': 12, 'learning_rate': 0.006350728966203915}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225512 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:34:33,641] Trial 36 finished with values: [0.6590289143480633, 0.6081350895833032] and parameters: {'n_estimators': 450, 'max_depth': 10, 'learning_rate': 0.0002001804843596527}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.225675 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:34:59,492] Trial 37 finished with values: [0.4780869248954355, 0.33864462825772623] and parameters: {'n_estimators': 140, 'max_depth': 15, 'learning_rate': 0.00010306325819530455}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.224657 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:35:17,605] Trial 38 finished with values: [0.47881432987815964, 0.34026189192233375] and parameters: {'n_estimators': 100, 'max_depth': 13, 'learning_rate': 0.00015481896416379335}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225336 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:35:59,743] Trial 39 finished with values: [0.6775777414075287, 0.6310061773350087] and parameters: {'n_estimators': 250, 'max_depth': 13, 'learning_rate': 0.00033644378626458747}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.235233 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:36:43,231] Trial 40 finished with values: [0.7035824695399163, 0.669167351802395] and parameters: {'n_estimators': 370, 'max_depth': 7, 'learning_rate': 0.006534139232093688}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:37:06,256] Trial 41 finished with values: [0.4735406437534097, 0.32996587273461825] and parameters: {'n_estimators': 140, 'max_depth': 10, 'learning_rate': 0.00011130846778792828}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:37:20,380] Trial 42 finished with values: [0.5904709947263139, 0.5307038802245316] and parameters: {'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.0006018639007939116}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.223647 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:37:56,562] Trial 43 finished with values: [0.7266775777414075, 0.6966211124835735] and parameters: {'n_estimators': 320, 'max_depth': 7, 'learning_rate': 0.011652413243255045}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:38:19,181] Trial 44 finished with values: [0.7723222404073468, 0.7507613366756253] and parameters: {'n_estimators': 330, 'max_depth': 5, 'learning_rate': 0.037419868731393496}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.229872 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:38:58,526] Trial 45 finished with values: [0.630114566284779, 0.5836629018147312] and parameters: {'n_estimators': 260, 'max_depth': 8, 'learning_rate': 0.0004195291580549655}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.224492 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:39:21,649] Trial 46 finished with values: [0.6146572104018913, 0.5714895439577345] and parameters: {'n_estimators': 130, 'max_depth': 14, 'learning_rate': 0.0002677152204237476}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.227487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:39:51,779] Trial 47 finished with values: [0.7783233315148209, 0.75736068634118] and parameters: {'n_estimators': 190, 'max_depth': 15, 'learning_rate': 0.026677945624448245}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.224362 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:40:15,252] Trial 48 finished with values: [0.790143662484088, 0.7682519780405782] and parameters: {'n_estimators': 370, 'max_depth': 5, 'learning_rate': 0.04857427209581946}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.375988 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:41:02,242] Trial 49 finished with values: [0.6652118567012184, 0.622850605875234] and parameters: {'n_estimators': 290, 'max_depth': 12, 'learning_rate': 0.0004642683685813127}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.221376 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:41:55,480] Trial 50 finished with values: [0.7117657755955629, 0.6807728050407006] and parameters: {'n_estimators': 340, 'max_depth': 12, 'learning_rate': 0.004414153811608333}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225800 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:42:31,306] Trial 51 finished with values: [0.8041462084015275, 0.7822569639572224] and parameters: {'n_estimators': 310, 'max_depth': 11, 'learning_rate': 0.04857427209581946}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.366922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:42:54,324] Trial 52 finished with values: [0.7137661392980542, 0.6816311382712676] and parameters: {'n_estimators': 170, 'max_depth': 8, 'learning_rate': 0.014890182608853405}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.220600 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:44:05,522] Trial 53 finished with values: [0.6764866339334424, 0.6295065398659209] and parameters: {'n_estimators': 450, 'max_depth': 12, 'learning_rate': 0.0002001804843596527}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.221640 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:44:27,729] Trial 54 finished with values: [0.8034188034188035, 0.7815317002928831] and parameters: {'n_estimators': 220, 'max_depth': 8, 'learning_rate': 0.09846426142855014}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.222236 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:44:57,265] Trial 55 finished with values: [0.7290416439352609, 0.6980402664294486] and parameters: {'n_estimators': 320, 'max_depth': 6, 'learning_rate': 0.014524797804171359}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.231030 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:45:39,666] Trial 56 finished with values: [0.7035824695399163, 0.669167351802395] and parameters: {'n_estimators': 370, 'max_depth': 7, 'learning_rate': 0.006534139232093688}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.229096 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:46:29,842] Trial 57 finished with values: [0.8030551009274414, 0.7808400626042517] and parameters: {'n_estimators': 410, 'max_depth': 12, 'learning_rate': 0.03730924722895519}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.239652 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:47:04,667] Trial 58 finished with values: [0.6210220040007274, 0.5686596121402382] and parameters: {'n_estimators': 320, 'max_depth': 6, 'learning_rate': 0.0008125486273686764}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226780 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:47:38,744] Trial 59 finished with values: [0.5770140025459174, 0.5136263240581515] and parameters: {'n_estimators': 370, 'max_depth': 5, 'learning_rate': 0.00017212623152044736}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.238294 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:48:30,281] Trial 60 finished with values: [0.7308601563920712, 0.7030843376727597] and parameters: {'n_estimators': 350, 'max_depth': 12, 'learning_rate': 0.006350728966203915}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.378173 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:48:58,067] Trial 61 finished with values: [0.6641207492271322, 0.6199556708518742] and parameters: {'n_estimators': 190, 'max_depth': 8, 'learning_rate': 0.0043374928878069045}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.369407 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:49:50,231] Trial 62 finished with values: [0.7750500090925623, 0.7529000941901222] and parameters: {'n_estimators': 430, 'max_depth': 10, 'learning_rate': 0.015222078426298117}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.220638 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:50:32,339] Trial 63 finished with values: [0.7043098745226405, 0.6703948241192635] and parameters: {'n_estimators': 430, 'max_depth': 6, 'learning_rate': 0.006827374635586789}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226248 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:50:57,974] Trial 64 finished with values: [0.7203127841425714, 0.6903929313574214] and parameters: {'n_estimators': 260, 'max_depth': 6, 'learning_rate': 0.014524797804171359}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.222638 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:51:47,693] Trial 65 finished with values: [0.8090561920349154, 0.7873905938893732] and parameters: {'n_estimators': 390, 'max_depth': 14, 'learning_rate': 0.061795130706726105}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223853 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:52:33,868] Trial 66 finished with values: [0.8059647208583379, 0.7840958268766256] and parameters: {'n_estimators': 410, 'max_depth': 11, 'learning_rate': 0.04864830736411999}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.223394 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:53:25,739] Trial 67 finished with values: [0.7945080923804329, 0.773496532755456] and parameters: {'n_estimators': 370, 'max_depth': 14, 'learning_rate': 0.020367296392913264}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.224691 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:53:47,005] Trial 68 finished with values: [0.6593926168394254, 0.6155991457212172] and parameters: {'n_estimators': 130, 'max_depth': 10, 'learning_rate': 0.003097333411170963}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.221593 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:54:32,838] Trial 69 finished with values: [0.685397344971813, 0.648604747025498] and parameters: {'n_estimators': 360, 'max_depth': 8, 'learning_rate': 0.004207915116612608}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.357145 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:54:54,475] Trial 70 finished with values: [0.712675031823968, 0.6810893451572954] and parameters: {'n_estimators': 270, 'max_depth': 5, 'learning_rate': 0.015222078426298117}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.231481 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:55:32,144] Trial 71 finished with values: [0.6944899072558647, 0.658707236995697] and parameters: {'n_estimators': 220, 'max_depth': 15, 'learning_rate': 0.0018824428650518596}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.216378 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:55:59,488] Trial 72 finished with values: [0.7992362247681397, 0.7783997194726652] and parameters: {'n_estimators': 200, 'max_depth': 13, 'learning_rate': 0.04859813159579442}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.220009 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:56:33,245] Trial 73 finished with values: [0.7274049827241317, 0.697229073219659] and parameters: {'n_estimators': 320, 'max_depth': 7, 'learning_rate': 0.012345710751106876}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.220039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:56:49,430] Trial 74 finished with values: [0.6210220040007274, 0.5686596121402382] and parameters: {'n_estimators': 140, 'max_depth': 6, 'learning_rate': 0.0018749897417486567}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.348555 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:57:07,418] Trial 75 finished with values: [0.7108565193671577, 0.6800370023162067] and parameters: {'n_estimators': 110, 'max_depth': 11, 'learning_rate': 0.015222078426298117}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.219749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:58:01,481] Trial 76 finished with values: [0.7697763229678123, 0.7484727703162201] and parameters: {'n_estimators': 360, 'max_depth': 15, 'learning_rate': 0.011652413243255045}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.234870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:58:22,040] Trial 77 finished with values: [0.6813966175668303, 0.6435020861340472] and parameters: {'n_estimators': 120, 'max_depth': 15, 'learning_rate': 0.0010432827937793454}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.220402 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:58:50,083] Trial 78 finished with values: [0.6737588652482269, 0.6326265192057197] and parameters: {'n_estimators': 220, 'max_depth': 7, 'learning_rate': 0.006534139232093688}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.215819 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:59:16,567] Trial 79 finished with values: [0.6386615748317876, 0.5922480668533594] and parameters: {'n_estimators': 180, 'max_depth': 9, 'learning_rate': 0.0007014703053448728}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.219366 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 11:59:29,411] Trial 80 finished with values: [0.6092016730314602, 0.5567366089047577] and parameters: {'n_estimators': 130, 'max_depth': 5, 'learning_rate': 0.0015593654304371215}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.224290 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:00:01,937] Trial 81 finished with values: [0.735224586288416, 0.7074112306875796] and parameters: {'n_estimators': 260, 'max_depth': 9, 'learning_rate': 0.014217094410984728}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.378146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:00:23,916] Trial 82 finished with values: [0.5941080196399345, 0.5401908016199481] and parameters: {'n_estimators': 150, 'max_depth': 8, 'learning_rate': 0.0004195291580549655}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.221908 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:01:25,990] Trial 83 finished with values: [0.6886706673940717, 0.6508171519027588] and parameters: {'n_estimators': 390, 'max_depth': 14, 'learning_rate': 0.0011759999206392196}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.287695 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:02:02,582] Trial 84 finished with values: [0.5770140025459174, 0.5138654354802722] and parameters: {'n_estimators': 410, 'max_depth': 5, 'learning_rate': 0.0001641606110572901}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.231701 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:02:32,070] Trial 85 finished with values: [0.6781232951445717, 0.6309504395659205] and parameters: {'n_estimators': 180, 'max_depth': 13, 'learning_rate': 0.00033644378626458747}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.352504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:02:49,966] Trial 86 finished with values: [0.6424804509910893, 0.5973435683164383] and parameters: {'n_estimators': 130, 'max_depth': 7, 'learning_rate': 0.004261049500215826}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.236007 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:03:41,258] Trial 87 finished with values: [0.6948536097472268, 0.6592707346134264] and parameters: {'n_estimators': 340, 'max_depth': 11, 'learning_rate': 0.0035022573123594885}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.357385 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:04:06,202] Trial 88 finished with values: [0.7481360247317694, 0.7215682110404783] and parameters: {'n_estimators': 240, 'max_depth': 7, 'learning_rate': 0.02470275417522579}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.232563 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:05:10,099] Trial 89 finished with values: [0.6461174759047099, 0.5948276815337404] and parameters: {'n_estimators': 450, 'max_depth': 9, 'learning_rate': 0.0002001804843596527}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.222432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:05:36,707] Trial 90 finished with values: [0.657392253136934, 0.6112880685565721] and parameters: {'n_estimators': 180, 'max_depth': 9, 'learning_rate': 0.0033001571895060343}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.236110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:06:22,340] Trial 91 finished with values: [0.8048736133842517, 0.7825337715237867] and parameters: {'n_estimators': 400, 'max_depth': 12, 'learning_rate': 0.061795130706726105}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.227729 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:06:51,224] Trial 92 finished with values: [0.6173849790871068, 0.5601577577602063] and parameters: {'n_estimators': 320, 'max_depth': 5, 'learning_rate': 0.0009081505672589648}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.220921 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:07:18,446] Trial 93 finished with values: [0.7957810511002, 0.7738324353398452] and parameters: {'n_estimators': 440, 'max_depth': 5, 'learning_rate': 0.04736499582408949}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228332 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:07:43,659] Trial 94 finished with values: [0.7055828332424077, 0.6720820424171018] and parameters: {'n_estimators': 310, 'max_depth': 5, 'learning_rate': 0.011652413243255045}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225785 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:08:02,193] Trial 95 finished with values: [0.762865975631933, 0.7395540785025726] and parameters: {'n_estimators': 270, 'max_depth': 5, 'learning_rate': 0.037419868731393496}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.226998 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:08:48,066] Trial 96 finished with values: [0.7595926532096745, 0.7356251169739161] and parameters: {'n_estimators': 410, 'max_depth': 8, 'learning_rate': 0.015222078426298117}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.378120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:09:29,081] Trial 97 finished with values: [0.8059647208583379, 0.7838448781902585] and parameters: {'n_estimators': 330, 'max_depth': 13, 'learning_rate': 0.04859813159579442}.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.376943 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:09:51,941] Trial 98 finished with values: [0.7497726859428987, 0.7229006664160093] and parameters: {'n_estimators': 320, 'max_depth': 5, 'learning_rate': 0.026677945624448245}.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.222219 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


[I 2024-10-23 12:10:19,632] Trial 99 finished with values: [0.7650481905801054, 0.7421132561662986] and parameters: {'n_estimators': 220, 'max_depth': 10, 'learning_rate': 0.024876946733966055}.


In [None]:
best_trial = sorted(study_lgbm.best_trials, key=lambda t: t.values[0], reverse=True)[0]

with mlflow.start_run() as run:
    mlflow.set_tag("mlflow.runName", "LightGBM")
    mlflow.set_tag("resampling_technique", "Adasyn")
    mlflow.set_tag("vectorizer_type", "TF-IDF")

    # Log best trial parameters
    mlflow.log_params(best_trial.params)

    # Log algorithm name as a parameter
    mlflow.log_param("algo_name", "LightGBM")

    # Extract parameters from the best trial
    best_params = best_trial.params

    # Initialize the model using the best trial parameters with unpacking (**)
    model = lgb.LGBMClassifier(
                              objective='multiclass',   # Multi-class classification objective
                              num_class=3,              # Number of classes
                              # device='gpu',             # Enable GPU
                              random_state=42, **best_trial.params)

    # Train the model on the resampled training data
    model.fit(X_resampled, y_resampled)

    # Predictions on the test set
    y_test_pred = model.predict(X_test_combined)

    # Log classification metrics
    classification_rep = classification_report(y_test, y_test_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_test_pred)

    # Log accuracy
    mlflow.log_metric("accuracy", accuracy)

    # Log each metric from classification report
    for label, metrics in classification_rep.items():
        if isinstance(metrics, dict):
            for metric, value in metrics.items():
                mlflow.log_metric(f"{label}_{metric}", value)

    # Generate and log confusion matrix
    conf_matrix = confusion_matrix(y_test, y_test_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix - LightGBM")

    # Save and log the confusion matrix plot
    confusion_matrix_filename = "confusion_matrix.png"
    plt.savefig(confusion_matrix_filename)
    mlflow.log_artifact(confusion_matrix_filename)
    plt.close()



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.234523 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 111332
[LightGBM] [Info] Number of data points in the train set: 34692, number of used features: 1000
[LightGBM] [Info] Start training from score -1.096971
[LightGBM] [Info] Start training from score -1.123301
[LightGBM] [Info] Start training from score -1.076124


2024/10/23 12:13:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run LightGBM at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/5/runs/37e81da10a4c436092a2ef115f5bd6a3.
2024/10/23 12:13:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/5.


#### Without resampling, using class weights

In [11]:
import lightgbm as lgb

# Initialize the LightGBM model with GPU support
model = lgb.LGBMClassifier(
    n_estimators=390,
    max_depth=14,
    learning_rate=0.061795130706726105,
    objective='multiclass',   # Multi-class classification objective
    num_class=3,              # Number of classes
    # device='gpu',             # Enable GPU
    random_state=42,
    class_weight= 'balanced'
)

# Fit the model on the resampled training data
model.fit(X_train_combined, y_train)

# Evaluate the model on the validation set
f1, accuracy = evaluate_model(model, X_val_combined, y_val)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.318602 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 78953
[LightGBM] [Info] Number of data points in the train set: 27496, number of used features: 998
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


In [12]:
f1, accuracy

(0.7818568603797807, 0.8014184397163121)

#### Tuning vectorization technique taking lightgbm as base learner

In [8]:
# Set or create an experiment
mlflow.set_experiment("Exp 2 - BoW vs TfIdf vs Word2vec")

<Experiment: artifact_location='mlflow-artifacts:/9a79f5eeed8641cb9572a547b3fbc09d', creation_time=1729440551320, experiment_id='2', last_update_time=1729440551320, lifecycle_stage='active', name='Exp 2 - BoW vs TfIdf vs Word2vec', tags={'mlflow.sharedViewState.00829c284b45d57939b76fd6febe9d7c1fcc3bbfb87f3c7ecf80063f898df0be': '{"searchFilter":"","orderByKey":"attributes.start_time","orderByAsc":false,"startTime":"ALL","lifecycleFilter":"Active","datasetsFilter":[],"modelVersionFilter":"All '
                                                                                            'Runs","selectedColumns":["metrics.`accuracy`","attributes.`Source`","metrics.`-1_recall`","metrics.`-1_precision`","metrics.`0_recall`","metrics.`0_precision`","metrics.`1_recall`","metrics.`1_precision`"],"runsExpanded":{},"runsPinned":["21250fa2e78e4669b7ee15c4fe8d19d4"],"runsHidden":[],"runsHiddenMode":"FIRST_10_RUNS","viewMaximized":false,"runListHidden":false,"isAccordionReordered":false,"groupBy":"",

In [9]:
# Objective function for Optuna
def objective(trial):

    # Hyperparameters for TF-IDF Vectorizer
    max_features = trial.suggest_int("max_features", 1000, 10000, step=10)
    ngram_range = trial.suggest_categorical("ngram_range", ["(1, 2)", "(1, 3)"])
    ngram_range = eval(ngram_range)


    vectorizer = TfidfVectorizer(max_features=max_features, ngram_range=ngram_range)
    X_train_vec = vectorizer.fit_transform(X_train['comment']).toarray()
    X_val_vec = vectorizer.transform(X_val['comment']).toarray()

    # Combine additional features
    X_train_combined = np.hstack([X_train_vec, X_train[['word_count', 'char_count', 'avg_word_length']].values])
    X_val_combined = np.hstack([X_val_vec, X_val[['word_count', 'char_count', 'avg_word_length']].values])

    # Initialize the LightGBM model
    model = lgb.LGBMClassifier(
        n_estimators=390,
        max_depth=14,
        learning_rate=0.061795130706726105,
        objective='multiclass',   # Multi-class classification objective
        num_class=3,              # Number of classes
        random_state=42,
        class_weight= 'balanced'
        )
    model.fit(X_train_combined, y_train)

    y_val_pred = model.predict(X_val_combined)  # Predict on validation set
    f1 = f1_score(y_val, y_val_pred, average='macro')  # Calculate F1 (macro)
    accuracy = accuracy_score(y_val, y_val_pred)  # Calculate accuracy

    # Start a child MLflow run
    trial_name = f"{ngram_range}_{max_features}"
    with mlflow.start_run(nested=True, run_name=trial_name):
        # Log hyperparameters
        mlflow.log_params({
            "ngram_range": ngram_range,
            "max_features": max_features
        })

        mlflow.log_param("vectorizer_type", "TF-IDF")

        # Log model metrics
        mlflow.log_metric("accuracy", accuracy)

        # Logging the classification report
        classification_rep = classification_report(y_val, y_val_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):  # For precision, recall, f1-score, etc.
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

    return accuracy, f1

In [None]:
# Run Optuna optimization
study = optuna.create_study(directions=["maximize", "maximize"], study_name="TFIDF_LightGBM_Optimization")
study.optimize(objective, n_trials=70)

In [19]:
mlflow.end_run()
# Log the parent MLflow run for the whole study
with mlflow.start_run(run_id='e73d97e3ac1c41048b1f68d9e06f1d18'):

    best_trial = sorted(study.best_trials, key=lambda t: t.values[0], reverse=True)[0]

    # Log best trial parameters
    mlflow.log_params(best_trial.params)

    # Log algorithm name as a parameter
    mlflow.log_param("algo_name", "LightGBM")

    # Extract parameters from the best trial
    best_params = best_trial.params

    vectorizer = TfidfVectorizer(ngram_range=eval(best_trial.params['ngram_range']), max_features=best_trial.params['max_features'])
    X_train_vec = vectorizer.fit_transform(X_train['comment']).toarray()
    X_test_vec = vectorizer.transform(X_test['comment']).toarray()

    # Combine additional features
    X_train_combined = np.hstack([X_train_vec, X_train[['word_count', 'char_count', 'avg_word_length']].values])
    X_test_combined = np.hstack([X_test_vec, X_test[['word_count', 'char_count', 'avg_word_length']].values])

    # Initialize the LightGBM model
    model = lgb.LGBMClassifier(
        n_estimators=390,
        max_depth=14,
        learning_rate=0.061795130706726105,
        objective='multiclass',   # Multi-class classification objective
        num_class=3,              # Number of classes
        random_state=42,
        class_weight='balanced'
        )

    model.fit(X_train_combined, y_train)

    # Predictions on the test set
    y_test_pred = model.predict(X_test_combined)

    # Log classification metrics
    classification_rep = classification_report(y_test, y_test_pred, output_dict=True)
    accuracy = accuracy_score(y_test, y_test_pred)

    # Log accuracy
    mlflow.log_metric("accuracy", accuracy)

    # Log each metric from classification report
    for label, metrics in classification_rep.items():
        if isinstance(metrics, dict):
            for metric, value in metrics.items():
                mlflow.log_metric(f"{label}_{metric}", value)

    # Generate and log confusion matrix
    conf_matrix = confusion_matrix(y_test, y_test_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix - LightGBM")

    # Save and log the confusion matrix plot
    confusion_matrix_filename = "confusion_matrix.png"
    plt.savefig(confusion_matrix_filename)
    mlflow.log_artifact(confusion_matrix_filename)
    plt.close()

2024/10/24 15:16:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run popular-sow-61 at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/2/runs/3247cbec947b454e9b0363b1b04d321b.
2024/10/24 15:16:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/2.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.833098 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 123708
[LightGBM] [Info] Number of data points in the train set: 27496, number of used features: 4170
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/24 15:17:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run TFIDF_LightGBM_Optimization at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/2/runs/e73d97e3ac1c41048b1f68d9e06f1d18.
2024/10/24 15:17:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/dakshvandanarathi/YT-Sentiment-Analyser.mlflow/#/experiments/2.


In [16]:
eval(best_trial.params['ngram_range'])

(1, 2)

In [20]:
sorted(study.best_trials, key=lambda t: t.values[0], reverse=True)[0]

FrozenTrial(number=68, state=TrialState.COMPLETE, values=[0.8597926895799236, 0.8475817525784809], datetime_start=datetime.datetime(2024, 10, 24, 14, 35, 2, 159732), datetime_complete=datetime.datetime(2024, 10, 24, 14, 36, 42, 287595), params={'max_features': 9410, 'ngram_range': '(1, 2)'}, user_attrs={}, system_attrs={'nsga2:generation': 1}, intermediate_values={}, distributions={'max_features': IntDistribution(high=10000, log=False, low=1000, step=10), 'ngram_range': CategoricalDistribution(choices=('(1, 2)', '(1, 3)'))}, trial_id=68, value=None)