In [28]:
!pip install mlflow boto3 awscli optuna imbalanced-learn lightgbm



In [29]:
import mlflow
# Step 2: Set up the MLflow tracking server
mlflow.set_tracking_uri("http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/")

In [30]:
# Set or create an experiment
mlflow.set_experiment("LightGBM HP Tuning")

<Experiment: artifact_location='s3://mlflow-s3-bucket-25/384915840249192182', creation_time=1766406761843, experiment_id='384915840249192182', last_update_time=1766406761843, lifecycle_stage='active', name='LightGBM HP Tuning', tags={}>

In [31]:
import pandas as pd

df = pd.read_csv('reddit_preprocessing.csv').dropna()
df.shape

(36662, 2)

In [32]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt

In [33]:
# Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

In [34]:
# TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [35]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [36]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)
        
        # Print trial progress
        print(f"Trial {trial_number}: Accuracy = {accuracy:.4f}")

        return accuracy

In [37]:
# Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)

    return accuracy

In [38]:
# Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    print("Starting LightGBM hyperparameter tuning with Optuna...")
    print("Testing 100 different parameter combinations...")
    print("=" * 60)
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100, show_progress_bar=True)

    # Get the best parameters
    best_params = study.best_params
    print("\n" + "=" * 60)
    print("Best parameters found:")
    for param, value in best_params.items():
        if isinstance(value, float):
            print(f"  {param}: {value:.6f}")
        else:
            print(f"  {param}: {value}")
    print(f"  Best accuracy: {study.best_value:.4f}")
    print("=" * 60)
    
    # Create the best model with optimal parameters
    best_model = LGBMClassifier(
        n_estimators=best_params['n_estimators'],
        learning_rate=best_params['learning_rate'],
        max_depth=best_params['max_depth'],
        num_leaves=best_params['num_leaves'],
        min_child_samples=best_params['min_child_samples'],
        colsample_bytree=best_params['colsample_bytree'],
        subsample=best_params['subsample'],
        reg_alpha=best_params['reg_alpha'],
        reg_lambda=best_params['reg_lambda'],
        random_state=42
    )
    
    # Log the best model with MLflow
    log_mlflow("LightGBM_Best", best_model, X_train, X_test, y_train, y_test, best_params, "Best")
    
    print("\nYou can view parameter importance in the MLflow UI.")
    print("\nNote: Optuna visualizations are skipped to avoid display issues.")
    print(f"✓ View results: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/")
    print("\n✓ Experiment completed!")


In [None]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2025-12-22 18:21:03,615] A new study created in memory with name: no-name-03a00232-1bf8-4ead-96b6-d6855954e235


Starting LightGBM hyperparameter tuning with Optuna...
Testing 100 different parameter combinations...




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.037896 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 0: Accuracy = 0.5621


2025/12/22 18:21:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_0_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/57836d2b2c0b47e6ae5c3c8323be61a1.
2025/12/22 18:21:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 0. Best value: 0.562143:   1%|          | 1/100 [00:31<52:30, 31.82s/it]

[I 2025-12-22 18:21:35,481] Trial 0 finished with value: 0.562143310082435 and parameters: {'n_estimators': 657, 'learning_rate': 0.0001910598449368873, 'max_depth': 3, 'num_leaves': 63, 'min_child_samples': 78, 'colsample_bytree': 0.7353538664430626, 'subsample': 0.7384794658509695, 'reg_alpha': 0.0028509466191572602, 'reg_lambda': 0.04643866427811101}. Best is trial 0 with value: 0.562143310082435.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.038549 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98863
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 1: Accuracy = 0.7016


2025/12/22 18:22:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_1_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/413362e5b9244cd287d081c6fa62f013.
2025/12/22 18:22:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 1. Best value: 0.701649:   2%|▏         | 2/100 [01:16<1:04:12, 39.31s/it]

[I 2025-12-22 18:22:20,029] Trial 1 finished with value: 0.7016487000634115 and parameters: {'n_estimators': 616, 'learning_rate': 0.00019785999037914435, 'max_depth': 14, 'num_leaves': 105, 'min_child_samples': 58, 'colsample_bytree': 0.6728405350140272, 'subsample': 0.8351967781744285, 'reg_alpha': 1.7755757977609086, 'reg_lambda': 3.802959903557808}. Best is trial 1 with value: 0.7016487000634115.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041819 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98594
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 950
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 2: Accuracy = 0.7452


2025/12/22 18:23:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_2_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/8e2994a7602a43a996b3a1d6905d7705.
2025/12/22 18:23:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 2. Best value: 0.745191:   3%|▎         | 3/100 [02:11<1:15:22, 46.62s/it]

[I 2025-12-22 18:23:15,345] Trial 2 finished with value: 0.7451912914817164 and parameters: {'n_estimators': 759, 'learning_rate': 0.0038373270923729563, 'max_depth': 13, 'num_leaves': 145, 'min_child_samples': 93, 'colsample_bytree': 0.9869865209606576, 'subsample': 0.9166220080912468, 'reg_alpha': 0.1893342206364093, 'reg_lambda': 0.0020832783118245406}. Best is trial 2 with value: 0.7451912914817164.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.041683 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98821
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 3: Accuracy = 0.6987


2025/12/22 18:24:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_3_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/a2f28c2e32c24b9f8d64318120095fd4.
2025/12/22 18:24:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 2. Best value: 0.745191:   4%|▍         | 4/100 [03:16<1:26:06, 53.82s/it]

[I 2025-12-22 18:24:20,216] Trial 3 finished with value: 0.6986894948213909 and parameters: {'n_estimators': 957, 'learning_rate': 0.0006087870707645105, 'max_depth': 12, 'num_leaves': 62, 'min_child_samples': 67, 'colsample_bytree': 0.6395523321173324, 'subsample': 0.5597792349721378, 'reg_alpha': 0.0005394131787992287, 'reg_lambda': 0.00011194730749356057}. Best is trial 2 with value: 0.7451912914817164.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054640 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 4: Accuracy = 0.8147


2025/12/22 18:25:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_4_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/9d7a8360e98e46689dec1ce7fa24e484.
2025/12/22 18:25:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 4. Best value: 0.814733:   5%|▌         | 5/100 [04:07<1:23:27, 52.71s/it]

[I 2025-12-22 18:25:10,929] Trial 4 finished with value: 0.8147326146692031 and parameters: {'n_estimators': 981, 'learning_rate': 0.04971520263087063, 'max_depth': 15, 'num_leaves': 45, 'min_child_samples': 50, 'colsample_bytree': 0.694110802554443, 'subsample': 0.9074582538690549, 'reg_alpha': 5.944443409199739, 'reg_lambda': 0.05023345626106832}. Best is trial 4 with value: 0.8147326146692031.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.049410 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99039
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 5: Accuracy = 0.8204


2025/12/22 18:25:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_5_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/1cee69afebb64d63999196e4e094daff.
2025/12/22 18:25:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:   6%|▌         | 6/100 [04:56<1:20:45, 51.54s/it] 

[I 2025-12-22 18:26:00,215] Trial 5 finished with value: 0.820439653350243 and parameters: {'n_estimators': 783, 'learning_rate': 0.05407987769329037, 'max_depth': 13, 'num_leaves': 44, 'min_child_samples': 19, 'colsample_bytree': 0.894998491308453, 'subsample': 0.6536613006957335, 'reg_alpha': 0.003855469327141668, 'reg_lambda': 0.16096686608510705}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99100
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 985
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 6: Accuracy = 0.6550


2025/12/22 18:26:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_6_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/ce0d30ab452843bb8ab1a24950852905.
2025/12/22 18:26:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:   7%|▋         | 7/100 [05:55<1:23:41, 54.00s/it]

[I 2025-12-22 18:26:59,277] Trial 6 finished with value: 0.6550412175015853 and parameters: {'n_estimators': 746, 'learning_rate': 0.00019761044471938277, 'max_depth': 8, 'num_leaves': 136, 'min_child_samples': 11, 'colsample_bytree': 0.7163122654084766, 'subsample': 0.5536136539629677, 'reg_alpha': 0.13237137371528698, 'reg_lambda': 0.1891807261256738}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032399 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98718
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 7: Accuracy = 0.7089


2025/12/22 18:27:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_7_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/0ccff6a673594feeaec6337b9d79d85e.
2025/12/22 18:27:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:   8%|▊         | 8/100 [06:29<1:12:46, 47.46s/it]

[I 2025-12-22 18:27:32,724] Trial 7 finished with value: 0.7089410272669626 and parameters: {'n_estimators': 296, 'learning_rate': 0.0018098563286893482, 'max_depth': 12, 'num_leaves': 65, 'min_child_samples': 86, 'colsample_bytree': 0.5359433563558313, 'subsample': 0.9909002165240713, 'reg_alpha': 0.001328325260095826, 'reg_lambda': 0.005330896531936336}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.034604 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98774
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 8: Accuracy = 0.7714


2025/12/22 18:28:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_8_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/47a50741f6ee47a8987c2dc0bd05233b.
2025/12/22 18:28:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:   9%|▉         | 9/100 [07:28<1:17:33, 51.14s/it]

[I 2025-12-22 18:28:31,964] Trial 8 finished with value: 0.7714013950538998 and parameters: {'n_estimators': 944, 'learning_rate': 0.00474157790643825, 'max_depth': 13, 'num_leaves': 66, 'min_child_samples': 72, 'colsample_bytree': 0.9097929064198256, 'subsample': 0.8567407577024089, 'reg_alpha': 0.2414060580821473, 'reg_lambda': 0.14065627387308344}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031732 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98863
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 9: Accuracy = 0.8081


2025/12/22 18:29:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_9_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/f791d204ae2347fc9cbdd5081d581bb3.
2025/12/22 18:29:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  10%|█         | 10/100 [07:59<1:07:15, 44.84s/it]

[I 2025-12-22 18:29:02,681] Trial 9 finished with value: 0.8080744028746565 and parameters: {'n_estimators': 465, 'learning_rate': 0.05501074295969039, 'max_depth': 9, 'num_leaves': 69, 'min_child_samples': 57, 'colsample_bytree': 0.6009970646067702, 'subsample': 0.7717568845435887, 'reg_alpha': 2.8541820190632663, 'reg_lambda': 1.3210142634007906}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99112
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 10: Accuracy = 0.6702


2025/12/22 18:29:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_10_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/102a0b995419493eb7ecb59fef455865.
2025/12/22 18:29:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  11%|█         | 11/100 [08:22<56:59, 38.42s/it]  

[I 2025-12-22 18:29:26,559] Trial 10 finished with value: 0.670154301416191 and parameters: {'n_estimators': 113, 'learning_rate': 0.01585652596670931, 'max_depth': 6, 'num_leaves': 20, 'min_child_samples': 10, 'colsample_bytree': 0.8388095243674694, 'subsample': 0.671502938581837, 'reg_alpha': 0.0001216564083396764, 'reg_lambda': 0.6499167699268189}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036828 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 11: Accuracy = 0.8161


2025/12/22 18:30:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_11_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/c3d0b84c590f491bb87b331d6369fe20.
2025/12/22 18:30:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  12%|█▏        | 12/100 [09:10<1:00:18, 41.12s/it]

[I 2025-12-22 18:30:13,867] Trial 11 finished with value: 0.8161065313887127 and parameters: {'n_estimators': 852, 'learning_rate': 0.09306949055828886, 'max_depth': 15, 'num_leaves': 28, 'min_child_samples': 34, 'colsample_bytree': 0.8254065405146427, 'subsample': 0.6438883401820816, 'reg_alpha': 0.015326437841063408, 'reg_lambda': 0.011144525398515357}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042344 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 12: Accuracy = 0.8190


2025/12/22 18:30:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_12_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/f7ca6dc5094e44cbb12c08331b51d8ff.
2025/12/22 18:30:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  13%|█▎        | 13/100 [09:53<1:00:24, 41.66s/it]

[I 2025-12-22 18:30:56,753] Trial 12 finished with value: 0.8189600507292327 and parameters: {'n_estimators': 808, 'learning_rate': 0.09404309884635743, 'max_depth': 11, 'num_leaves': 23, 'min_child_samples': 32, 'colsample_bytree': 0.8296325273652116, 'subsample': 0.6329797317064599, 'reg_alpha': 0.01644206079243229, 'reg_lambda': 0.005483793577557147}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038792 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 13: Accuracy = 0.7934


2025/12/22 18:31:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_13_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/d44d8ff1fc434ea1a15c3a458603dfba.
2025/12/22 18:31:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  14%|█▍        | 14/100 [10:36<1:00:19, 42.09s/it]

[I 2025-12-22 18:31:39,839] Trial 13 finished with value: 0.7933840625660536 and parameters: {'n_estimators': 486, 'learning_rate': 0.01857542332725463, 'max_depth': 10, 'num_leaves': 40, 'min_child_samples': 30, 'colsample_bytree': 0.8179318917828732, 'subsample': 0.6534475673794413, 'reg_alpha': 0.008476650306256488, 'reg_lambda': 0.001080075218364671}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.041279 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 14: Accuracy = 0.8061


2025/12/22 18:32:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_14_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/4e89378fc7dc42ffab7eb5efc0d02de0.
2025/12/22 18:32:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  15%|█▌        | 15/100 [11:28<1:04:10, 45.29s/it]

[I 2025-12-22 18:32:32,558] Trial 14 finished with value: 0.8060663707461425 and parameters: {'n_estimators': 798, 'learning_rate': 0.018840020329418606, 'max_depth': 11, 'num_leaves': 96, 'min_child_samples': 28, 'colsample_bytree': 0.9103219324646542, 'subsample': 0.5926097056267848, 'reg_alpha': 0.0384970921505397, 'reg_lambda': 0.00039343389596565887}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 15: Accuracy = 0.8047


2025/12/22 18:33:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_15_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/c222027d1e4f448ab83bc471ee1c48bc.
2025/12/22 18:33:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  16%|█▌        | 16/100 [12:09<1:01:27, 43.89s/it]

[I 2025-12-22 18:33:13,218] Trial 15 finished with value: 0.8046924540266328 and parameters: {'n_estimators': 652, 'learning_rate': 0.03515053415598545, 'max_depth': 7, 'num_leaves': 39, 'min_child_samples': 41, 'colsample_bytree': 0.9968115180254279, 'subsample': 0.5048500178879989, 'reg_alpha': 0.005557557185986074, 'reg_lambda': 0.01218638594483398}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035326 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99010
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 970
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 16: Accuracy = 0.7729


2025/12/22 18:34:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_16_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/544af23733e54459ae920d857621843e.
2025/12/22 18:34:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  17%|█▋        | 17/100 [13:03<1:05:03, 47.03s/it]

[I 2025-12-22 18:34:07,543] Trial 16 finished with value: 0.7728809976749101 and parameters: {'n_estimators': 859, 'learning_rate': 0.008559822540828, 'max_depth': 10, 'num_leaves': 47, 'min_child_samples': 21, 'colsample_bytree': 0.8891683264917007, 'subsample': 0.7098874730094564, 'reg_alpha': 0.05354357069966312, 'reg_lambda': 7.768011144680495}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 17: Accuracy = 0.8069


2025/12/22 18:34:37 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_17_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/7be7faddfef7466f987ffe511806d70b.
2025/12/22 18:34:37 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  18%|█▊        | 18/100 [13:34<57:23, 42.00s/it]  

[I 2025-12-22 18:34:37,817] Trial 17 finished with value: 0.8069118579581483 and parameters: {'n_estimators': 373, 'learning_rate': 0.0878370646528255, 'max_depth': 5, 'num_leaves': 25, 'min_child_samples': 43, 'colsample_bytree': 0.7987351224986475, 'subsample': 0.616438543243805, 'reg_alpha': 0.0012896435037956934, 'reg_lambda': 0.21814132069051556}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039613 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99010
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 970
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 18: Accuracy = 0.8068


2025/12/22 18:35:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_18_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/d1e6c2ac47e14b1faf4b555ec5cd2b36.
2025/12/22 18:35:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  19%|█▉        | 19/100 [14:19<58:00, 42.97s/it]

[I 2025-12-22 18:35:23,033] Trial 18 finished with value: 0.8068061720566476 and parameters: {'n_estimators': 565, 'learning_rate': 0.027513756203685624, 'max_depth': 11, 'num_leaves': 85, 'min_child_samples': 21, 'colsample_bytree': 0.77504815022056, 'subsample': 0.783205843041275, 'reg_alpha': 0.00010071888312675715, 'reg_lambda': 0.02102223322587532}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036971 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99018
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 19: Accuracy = 0.7008


2025/12/22 18:36:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_19_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/d793b69a1d3d44e7bc5f99d9fcae7fe7.
2025/12/22 18:36:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  20%|██        | 20/100 [15:33<1:09:53, 52.42s/it]

[I 2025-12-22 18:36:37,516] Trial 19 finished with value: 0.7008032128514057 and parameters: {'n_estimators': 708, 'learning_rate': 0.001545184056872108, 'max_depth': 13, 'num_leaves': 123, 'min_child_samples': 20, 'colsample_bytree': 0.8734023429983481, 'subsample': 0.7004608708639974, 'reg_alpha': 0.6607311448159247, 'reg_lambda': 0.0030757260801470947}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 20: Accuracy = 0.7812


2025/12/22 18:37:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_20_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/12e7fbb81a454f8596630395c206afa4.
2025/12/22 18:37:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  21%|██        | 21/100 [16:32<1:11:32, 54.34s/it]

[I 2025-12-22 18:37:36,290] Trial 20 finished with value: 0.7812301838934687 and parameters: {'n_estimators': 863, 'learning_rate': 0.009315442073992474, 'max_depth': 9, 'num_leaves': 84, 'min_child_samples': 39, 'colsample_bytree': 0.9485164723886167, 'subsample': 0.5993873004397318, 'reg_alpha': 0.021108626224732054, 'reg_lambda': 0.8797101417151936}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046565 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 21: Accuracy = 0.8168


2025/12/22 18:38:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_21_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/e3fd2eba769f45528e726dca211cb2c2.
2025/12/22 18:38:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  22%|██▏       | 22/100 [17:22<1:08:51, 52.97s/it]

[I 2025-12-22 18:38:26,100] Trial 21 finished with value: 0.8168463326992179 and parameters: {'n_estimators': 860, 'learning_rate': 0.09462570123847365, 'max_depth': 15, 'num_leaves': 27, 'min_child_samples': 32, 'colsample_bytree': 0.8481480010038815, 'subsample': 0.6496994920388826, 'reg_alpha': 0.011104714452851413, 'reg_lambda': 0.009091917520524344}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.039898 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 22: Accuracy = 0.8170


2025/12/22 18:39:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_22_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/18071689839e49e988e232a0b3a3cc2d.
2025/12/22 18:39:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 5. Best value: 0.82044:  23%|██▎       | 23/100 [18:13<1:07:09, 52.33s/it]

[I 2025-12-22 18:39:16,923] Trial 22 finished with value: 0.8169520186007186 and parameters: {'n_estimators': 864, 'learning_rate': 0.09670112018038628, 'max_depth': 14, 'num_leaves': 34, 'min_child_samples': 25, 'colsample_bytree': 0.7601207541642713, 'subsample': 0.6893818486177696, 'reg_alpha': 0.004308164631063072, 'reg_lambda': 0.0007521643506574851}. Best is trial 5 with value: 0.820439653350243.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.060717 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99039
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 23: Accuracy = 0.8209


2025/12/22 18:40:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_23_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/d50197e718244ddaa1c340ed7a567d60.
2025/12/22 18:40:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  24%|██▍       | 24/100 [19:05<1:06:18, 52.36s/it]

[I 2025-12-22 18:40:09,342] Trial 23 finished with value: 0.8208623969562461 and parameters: {'n_estimators': 795, 'learning_rate': 0.05167129417647259, 'max_depth': 14, 'num_leaves': 52, 'min_child_samples': 18, 'colsample_bytree': 0.7501784843895845, 'subsample': 0.7052601797648755, 'reg_alpha': 0.0028066749350571, 'reg_lambda': 0.0004971472166462539}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037745 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99064
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 24: Accuracy = 0.8178


2025/12/22 18:41:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_24_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/871b782d8ddc41fea555f7e6089b3167.
2025/12/22 18:41:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  25%|██▌       | 25/100 [19:58<1:05:35, 52.47s/it]

[I 2025-12-22 18:41:02,091] Trial 24 finished with value: 0.8177975058127246 and parameters: {'n_estimators': 685, 'learning_rate': 0.04746058729538909, 'max_depth': 12, 'num_leaves': 53, 'min_child_samples': 15, 'colsample_bytree': 0.9389535295177098, 'subsample': 0.7271692831197107, 'reg_alpha': 0.00037935828851988593, 'reg_lambda': 0.00017972752211726877}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053902 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 25: Accuracy = 0.8107


2025/12/22 18:41:51 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_25_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/f9516c2ac0e54d9db8b2057b5c1a0049.
2025/12/22 18:41:51 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  26%|██▌       | 26/100 [20:48<1:03:56, 51.84s/it]

[I 2025-12-22 18:41:52,450] Trial 25 finished with value: 0.810716550412175 and parameters: {'n_estimators': 783, 'learning_rate': 0.027817355699075976, 'max_depth': 11, 'num_leaves': 54, 'min_child_samples': 47, 'colsample_bytree': 0.7774216948655891, 'subsample': 0.7766208370661002, 'reg_alpha': 0.0016163585585907928, 'reg_lambda': 0.0005436728369095416}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042611 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99053
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 976
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 26: Accuracy = 0.7910


2025/12/22 18:42:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_26_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/f86b00ff8a354cf68634f2e2996cf1ab.
2025/12/22 18:42:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  27%|██▋       | 27/100 [21:46<1:05:14, 53.62s/it]

[I 2025-12-22 18:42:50,221] Trial 26 finished with value: 0.7909532868315367 and parameters: {'n_estimators': 580, 'learning_rate': 0.009767582076197974, 'max_depth': 14, 'num_leaves': 76, 'min_child_samples': 16, 'colsample_bytree': 0.8648962743570652, 'subsample': 0.5126946042116381, 'reg_alpha': 0.0004869798014979192, 'reg_lambda': 0.0018075297344938193}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036662 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98971
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 27: Accuracy = 0.8160


2025/12/22 18:43:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_27_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/ff867b43c8d54fcd9d93c1e83270e783.
2025/12/22 18:43:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  28%|██▊       | 28/100 [22:25<58:55, 49.11s/it]  

[I 2025-12-22 18:43:28,801] Trial 27 finished with value: 0.816000845487212 and parameters: {'n_estimators': 496, 'learning_rate': 0.05472870171803512, 'max_depth': 13, 'num_leaves': 34, 'min_child_samples': 36, 'colsample_bytree': 0.9472546496414966, 'subsample': 0.6232580820000146, 'reg_alpha': 0.08540093300762645, 'reg_lambda': 0.09272684161772154}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040658 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 28: Accuracy = 0.8106


2025/12/22 18:44:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_28_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/e4d87d41b476478ea527391beda66187.
2025/12/22 18:44:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  29%|██▉       | 29/100 [23:11<57:05, 48.25s/it]

[I 2025-12-22 18:44:15,053] Trial 28 finished with value: 0.8106108645106743 and parameters: {'n_estimators': 733, 'learning_rate': 0.030223541666598363, 'max_depth': 10, 'num_leaves': 52, 'min_child_samples': 24, 'colsample_bytree': 0.7426051594571226, 'subsample': 0.5732347755048127, 'reg_alpha': 0.0028717811797730063, 'reg_lambda': 0.3767211465153097}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040554 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99053
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 976
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 29: Accuracy = 0.5799


2025/12/22 18:44:54 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_29_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/cee19f5522184c75b91f86cbf839a65a.
2025/12/22 18:44:54 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  30%|███       | 30/100 [23:51<53:30, 45.86s/it]

[I 2025-12-22 18:44:55,320] Trial 29 finished with value: 0.5798985415345593 and parameters: {'n_estimators': 924, 'learning_rate': 0.00044298088844530023, 'max_depth': 3, 'num_leaves': 58, 'min_child_samples': 16, 'colsample_bytree': 0.802557304187295, 'subsample': 0.743885240345538, 'reg_alpha': 0.03481486006308964, 'reg_lambda': 0.02976808839521896}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.046050 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98984
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 30: Accuracy = 0.7650


2025/12/22 18:45:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_30_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/1e4455568ca541289ab1cbaf313ed1bc.
2025/12/22 18:45:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  31%|███       | 31/100 [24:35<52:04, 45.28s/it]

[I 2025-12-22 18:45:39,273] Trial 30 finished with value: 0.7649545550623547 and parameters: {'n_estimators': 656, 'learning_rate': 0.0066228367443106635, 'max_depth': 12, 'num_leaves': 21, 'min_child_samples': 29, 'colsample_bytree': 0.7041242834285423, 'subsample': 0.8199059905154288, 'reg_alpha': 0.006775667588611189, 'reg_lambda': 0.060850131849950945}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.058138 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99064
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 31: Accuracy = 0.8181


2025/12/22 18:46:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_31_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/4a15f63142544d4c8b3811ae2b70c66b.
2025/12/22 18:46:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 23. Best value: 0.820862:  32%|███▏      | 32/100 [25:25<53:01, 46.78s/it]

[I 2025-12-22 18:46:29,547] Trial 31 finished with value: 0.8181145635172268 and parameters: {'n_estimators': 691, 'learning_rate': 0.052582391623733064, 'max_depth': 12, 'num_leaves': 46, 'min_child_samples': 15, 'colsample_bytree': 0.9410380909253283, 'subsample': 0.7185908378117059, 'reg_alpha': 0.00031189524940855844, 'reg_lambda': 0.00016849178245164563}. Best is trial 23 with value: 0.8208623969562461.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047995 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99112
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 32: Accuracy = 0.8222


2025/12/22 18:47:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_32_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/06745b4f11e2432fb0989e4c9f913aae.
2025/12/22 18:47:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  33%|███▎      | 33/100 [26:22<55:24, 49.63s/it]

[I 2025-12-22 18:47:25,802] Trial 32 finished with value: 0.8222363136757557 and parameters: {'n_estimators': 796, 'learning_rate': 0.058379004933481624, 'max_depth': 14, 'num_leaves': 44, 'min_child_samples': 10, 'colsample_bytree': 0.9122986590610579, 'subsample': 0.6881367675687549, 'reg_alpha': 0.00023865948861254957, 'reg_lambda': 0.00042680865241492025}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.049000 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99112
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 33: Accuracy = 0.8125


2025/12/22 18:48:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_33_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/ff3f20a6752b4a7a996cea7b9842814d.
2025/12/22 18:48:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  34%|███▍      | 34/100 [27:24<58:37, 53.30s/it]

[I 2025-12-22 18:48:27,679] Trial 33 finished with value: 0.8125132107376876 and parameters: {'n_estimators': 913, 'learning_rate': 0.016803992344134152, 'max_depth': 14, 'num_leaves': 36, 'min_child_samples': 10, 'colsample_bytree': 0.8936907679067396, 'subsample': 0.6875790084691593, 'reg_alpha': 0.002125115982512382, 'reg_lambda': 0.000301079038329729}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044675 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 34: Accuracy = 0.8195


2025/12/22 18:49:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_34_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/5a02be9a807b43be8b846d6ec776ad6a.
2025/12/22 18:49:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  35%|███▌      | 35/100 [28:20<58:50, 54.32s/it]

[I 2025-12-22 18:49:24,376] Trial 34 finished with value: 0.8194884802367364 and parameters: {'n_estimators': 802, 'learning_rate': 0.07330590393664824, 'max_depth': 14, 'num_leaves': 76, 'min_child_samples': 24, 'colsample_bytree': 0.8581286469852557, 'subsample': 0.667271099258467, 'reg_alpha': 0.0002089025172902271, 'reg_lambda': 0.0012741194342420778}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.072088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99039
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 35: Accuracy = 0.8191


2025/12/22 18:50:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_35_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/945764564fd14b3eb789b1ea908882a0.
2025/12/22 18:50:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  36%|███▌      | 36/100 [29:28<1:02:19, 58.43s/it]

[I 2025-12-22 18:50:32,405] Trial 35 finished with value: 0.8190657366307335 and parameters: {'n_estimators': 807, 'learning_rate': 0.04011651341100609, 'max_depth': 15, 'num_leaves': 80, 'min_child_samples': 19, 'colsample_bytree': 0.9742939595850935, 'subsample': 0.7432420146501967, 'reg_alpha': 0.0002283724682444244, 'reg_lambda': 0.0012641929012053188}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073666 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 36: Accuracy = 0.8179


2025/12/22 18:51:37 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_36_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/bdd0973a214a4de88ecd23bc6c7c7637.
2025/12/22 18:51:37 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  37%|███▋      | 37/100 [30:34<1:03:44, 60.70s/it]

[I 2025-12-22 18:51:38,388] Trial 36 finished with value: 0.8179031917142253 and parameters: {'n_estimators': 990, 'learning_rate': 0.06492711005791957, 'max_depth': 14, 'num_leaves': 96, 'min_child_samples': 24, 'colsample_bytree': 0.6541978581878243, 'subsample': 0.6702127549590466, 'reg_alpha': 0.0007698311746266665, 'reg_lambda': 0.00027724765937589167}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.085283 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98821
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 37: Accuracy = 0.7089


2025/12/22 18:52:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_37_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/857a3464e5184b45b28b7b6dc1bb5270.
2025/12/22 18:52:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  38%|███▊      | 38/100 [31:33<1:02:09, 60.16s/it]

[I 2025-12-22 18:52:37,300] Trial 37 finished with value: 0.7089410272669626 and parameters: {'n_estimators': 601, 'learning_rate': 0.002420442037112207, 'max_depth': 13, 'num_leaves': 71, 'min_child_samples': 65, 'colsample_bytree': 0.9174789704763359, 'subsample': 0.6758343028058468, 'reg_alpha': 0.00085969539753545, 'reg_lambda': 0.003980035485166599}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.116080 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99070
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 979
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 38: Accuracy = 0.6921


2025/12/22 18:53:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_38_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/8b1059f11a184cd085d19cea842b2539.
2025/12/22 18:53:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  39%|███▉      | 39/100 [32:55<1:07:50, 66.73s/it]

[I 2025-12-22 18:53:59,346] Trial 38 finished with value: 0.692136968928345 and parameters: {'n_estimators': 632, 'learning_rate': 0.0008678245426355292, 'max_depth': 14, 'num_leaves': 96, 'min_child_samples': 13, 'colsample_bytree': 0.8650562750149036, 'subsample': 0.8083874298335787, 'reg_alpha': 0.0001392185171440319, 'reg_lambda': 0.0007351848355138518}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.071486 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98331
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 942
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 39: Accuracy = 0.7181


2025/12/22 18:54:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_39_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/418f80583b9445b5b9737efa9391184a.
2025/12/22 18:54:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  40%|████      | 40/100 [33:56<1:05:02, 65.05s/it]

[I 2025-12-22 18:55:00,471] Trial 39 finished with value: 0.718135700697527 and parameters: {'n_estimators': 775, 'learning_rate': 0.00012360098344509226, 'max_depth': 15, 'num_leaves': 60, 'min_child_samples': 100, 'colsample_bytree': 0.5647816487426979, 'subsample': 0.5422666922791465, 'reg_alpha': 0.00021682179678270757, 'reg_lambda': 0.00010870025172738956}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092586 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98881
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 961
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 40: Accuracy = 0.8018


2025/12/22 18:56:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_40_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/4ee2d587e641493bb711e860159edd1b.
2025/12/22 18:56:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  41%|████      | 41/100 [35:06<1:05:21, 66.47s/it]

[I 2025-12-22 18:56:10,263] Trial 40 finished with value: 0.8018389346861129 and parameters: {'n_estimators': 735, 'learning_rate': 0.013433834788297091, 'max_depth': 13, 'num_leaves': 109, 'min_child_samples': 53, 'colsample_bytree': 0.7222049069542412, 'subsample': 0.5934084364156879, 'reg_alpha': 0.0007858661587605507, 'reg_lambda': 0.0019755184348892816}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.091222 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99039
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 974
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 41: Accuracy = 0.8190


2025/12/22 18:57:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_41_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/b9b7f130826a4433a4e30c4705347374.
2025/12/22 18:57:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  42%|████▏     | 42/100 [36:14<1:04:38, 66.86s/it]

[I 2025-12-22 18:57:17,911] Trial 41 finished with value: 0.8189600507292327 and parameters: {'n_estimators': 818, 'learning_rate': 0.03921027590661579, 'max_depth': 15, 'num_leaves': 79, 'min_child_samples': 19, 'colsample_bytree': 0.9669519157062972, 'subsample': 0.7374752890068053, 'reg_alpha': 0.00020464789670869565, 'reg_lambda': 0.001214664794549796}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.233616 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98994
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 42: Accuracy = 0.8134


2025/12/22 18:58:37 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_42_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/93bb46f6ffc442209d5411ab93db5523.
2025/12/22 18:58:37 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  43%|████▎     | 43/100 [37:35<1:07:36, 71.17s/it]

[I 2025-12-22 18:58:39,224] Trial 42 finished with value: 0.8133586979496935 and parameters: {'n_estimators': 750, 'learning_rate': 0.02348913977427554, 'max_depth': 15, 'num_leaves': 88, 'min_child_samples': 26, 'colsample_bytree': 0.971540295150137, 'subsample': 0.7504332767218433, 'reg_alpha': 0.00023416605395973087, 'reg_lambda': 0.0012727611986376897}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.335686 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99112
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 43: Accuracy = 0.8221


2025/12/22 19:00:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_43_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/318e95183ec440c5b580443aaf11dc80.
2025/12/22 19:00:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  44%|████▍     | 44/100 [39:13<1:13:48, 79.09s/it]

[I 2025-12-22 19:00:16,818] Trial 43 finished with value: 0.8221306277742549 and parameters: {'n_estimators': 903, 'learning_rate': 0.06305948635392422, 'max_depth': 14, 'num_leaves': 75, 'min_child_samples': 10, 'colsample_bytree': 0.9220227068063019, 'subsample': 0.888694749434396, 'reg_alpha': 0.0033293077068896784, 'reg_lambda': 2.3030729605983034}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.290425 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99112
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 44: Accuracy = 0.8221


2025/12/22 19:01:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_44_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/c9fc45d6129f4b1497139fc96d454b88.
2025/12/22 19:01:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  45%|████▌     | 45/100 [40:28<1:11:27, 77.96s/it]

[I 2025-12-22 19:01:32,095] Trial 44 finished with value: 0.8221306277742549 and parameters: {'n_estimators': 906, 'learning_rate': 0.07410414009760831, 'max_depth': 14, 'num_leaves': 70, 'min_child_samples': 10, 'colsample_bytree': 0.6754221747625841, 'subsample': 0.9843218357046993, 'reg_alpha': 0.0030875510758319675, 'reg_lambda': 2.531524420677435}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99064
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 978
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612




Trial 45: Accuracy = 0.8204


2025/12/22 19:02:47 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_45_LightGBM_SMOTE_TFIDF_Trigrams at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182/runs/c590993c44b341b383e60c50ce5271ac.
2025/12/22 19:02:47 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://ec2-98-89-47-151.compute-1.amazonaws.com:5000/#/experiments/384915840249192182.
Best trial: 32. Best value: 0.822236:  46%|████▌     | 46/100 [41:45<1:10:00, 77.78s/it]

[I 2025-12-22 19:02:49,378] Trial 45 finished with value: 0.820439653350243 and parameters: {'n_estimators': 908, 'learning_rate': 0.06062583199902504, 'max_depth': 13, 'num_leaves': 67, 'min_child_samples': 14, 'colsample_bytree': 0.6107741669875595, 'subsample': 0.9700529373664364, 'reg_alpha': 0.0036371671409054476, 'reg_lambda': 2.0262790284797583}. Best is trial 32 with value: 0.8222363136757557.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097827 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 99112
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 988
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


