In [None]:
!pip install mlflow boto3 awscli optuna lightgbm imbalanced-learn

Collecting mlflow
  Downloading mlflow-2.17.0-py3-none-any.whl.metadata (29 kB)
Collecting boto3
  Downloading boto3-1.35.45-py3-none-any.whl.metadata (6.7 kB)
Collecting awscli
  Downloading awscli-1.35.11-py3-none-any.whl.metadata (11 kB)
Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting mlflow-skinny==2.17.0 (from mlflow)
  Downloading mlflow_skinny-2.17.0-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4-py2.py3-none-any.whl.metadata (6.7 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.17.0->mlflow)
  Downloading databricks_sdk-0.35.0-py3-none-any.whl.metadata (38 kB)
Coll

In [None]:
from google.colab import userdata
token=userdata.get('DAGSHUB_PAT')

In [None]:
import os
import mlflow


os.environ["MLFLOW_TRACKING_USERNAME"] = token
os.environ["MLFLOW_TRACKING_PASSWORD"] = token

dagshub_url = "https://dagshub.com"
repo_owner = "Sharad-18"
repo_name = "Mini-Mlops-Project"

# Set up MLflow tracking URI
mlflow.set_tracking_uri("https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow")



In [None]:
# Set or create an experiment
mlflow.set_experiment("LightGBM HP Tuning")

2024/10/22 11:02:03 INFO mlflow.tracking.fluent: Experiment with name 'LightGBM HP Tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/845b91aa4aca448e973401640379d503', creation_time=1729594923726, experiment_id='5', last_update_time=1729594923726, lifecycle_stage='active', name='LightGBM HP Tuning', tags={}>

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
import mlflow
import mlflow.sklearn
import optuna
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [None]:
df = pd.read_csv('/content/reddit_preprocessing (1).csv').dropna()
df.shape

(36662, 2)

In [None]:
df['category']=df['category'].map({-1:2,0:0,1:1}
                                  )
df=df.dropna(subset=['category'])
df.shape

(36662, 2)

In [None]:
# Step 3: TF-IDF vectorizer setup
ngram_range = (1, 3)  # Trigram
max_features = 1000  # Set max_features to 1000
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X = vectorizer.fit_transform(df['clean_comment'])
y = df['category']

# Step 4: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [None]:
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

In [None]:
# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test, params, trial_number):
    with mlflow.start_run():
        # Log model type and trial number
        mlflow.set_tag("mlflow.runName", f"Trial_{trial_number}_{model_name}_SMOTE_TFIDF_Trigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Log hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")

        return accuracy





In [None]:
# Step 6: Optuna objective function for LightGBM
def objective_lightgbm(trial):
    # Hyperparameter space to explore
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    num_leaves = trial.suggest_int('num_leaves', 20, 150)
    min_child_samples = trial.suggest_int('min_child_samples', 10, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    reg_alpha = trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True)  # L1 regularization
    reg_lambda = trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True)  # L2 regularization

    # Log trial parameters
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'num_leaves': num_leaves,
        'min_child_samples': min_child_samples,
        'colsample_bytree': colsample_bytree,
        'subsample': subsample,
        'reg_alpha': reg_alpha,
        'reg_lambda': reg_lambda
    }

    # Create LightGBM model
    model = LGBMClassifier(n_estimators=n_estimators,
                           learning_rate=learning_rate,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           colsample_bytree=colsample_bytree,
                           subsample=subsample,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda,
                           random_state=42)

    # Log each trial as a separate run in MLflow
    accuracy = log_mlflow("LightGBM", model, X_train, X_test, y_train, y_test, params, trial.number)

    return accuracy




In [None]:
# Step 7: Run Optuna for LightGBM, log the best model, and plot the importance of each parameter
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_lightgbm, n_trials=100)  # Increased to 100 trials

    # Get the best parameters
    best_params = study.best_params
    best_model = LGBMClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                max_depth=best_params['max_depth'],
                                num_leaves=best_params['num_leaves'],
                                min_child_samples=best_params['min_child_samples'],
                                colsample_bytree=best_params['colsample_bytree'],
                                subsample=best_params['subsample'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                random_state=42)

    # Log the best model with MLflow and print the classification report
    log_mlflow("LightGBM", best_model, X_train, X_test, y_train, y_test, best_params, "Best")

    # Plot parameter importance
    optuna.visualization.plot_param_importances(study).show()

    # Plot optimization history
    optuna.visualization.plot_optimization_history(study).show()

In [None]:
# Run the experiment for LightGBM
run_optuna_experiment()

[I 2024-10-22 11:11:45,535] A new study created in memory with name: no-name-e39747e6-9224-409d-ab20-5bbd5d336bee


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.243680 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98709
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:13:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_0_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/af3475c9f5c04c7999585fb72487592c.
2024/10/22 11:13:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:13:46,357] Trial 0 finished with value: 0.8105051786091735 and parameters: {'n_estimators': 750, 'learning_rate': 0.032826761959339916, 'max_depth': 13, 'num_leaves': 70, 'min_child_samples': 75, 'colsample_bytree': 0.5543328772300984, 'subsample': 0.6641038748682453, 'reg_alpha': 2.2186844948700615, 'reg_lambda': 1.9126328973909692}. Best is trial 0 with value: 0.8105051786091735.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.388767 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98919
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 967
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:14:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_1_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/fdd47fd56a9a46f6ac61685488bfc519.
2024/10/22 11:14:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:14:25,222] Trial 1 finished with value: 0.7079898541534559 and parameters: {'n_estimators': 201, 'learning_rate': 0.015456652320026862, 'max_depth': 7, 'num_leaves': 114, 'min_child_samples': 29, 'colsample_bytree': 0.5887029817576184, 'subsample': 0.7109986527848671, 'reg_alpha': 0.008920133443183058, 'reg_lambda': 5.4767551002980595}. Best is trial 0 with value: 0.8105051786091735.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228250 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98816
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 961
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:16:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_2_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/ea27ce4b0f054d05a4a6e3d3fa74726a.
2024/10/22 11:16:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:16:07,618] Trial 2 finished with value: 0.8022616782921158 and parameters: {'n_estimators': 722, 'learning_rate': 0.03336623121673192, 'max_depth': 11, 'num_leaves': 98, 'min_child_samples': 53, 'colsample_bytree': 0.5532076443035435, 'subsample': 0.7432375187096172, 'reg_alpha': 9.734447799373768, 'reg_lambda': 0.7338584478677368}. Best is trial 0 with value: 0.8105051786091735.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.228543 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98798
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:17:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_3_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/db2a0deabe50435b924e5bdd8941fb73.
2024/10/22 11:17:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:17:46,837] Trial 3 finished with value: 0.8174804481082224 and parameters: {'n_estimators': 547, 'learning_rate': 0.07505535801730019, 'max_depth': 14, 'num_leaves': 79, 'min_child_samples': 56, 'colsample_bytree': 0.5734008391170997, 'subsample': 0.7838918812404524, 'reg_alpha': 0.00010978711305434743, 'reg_lambda': 0.19034744149688373}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.222046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98682
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 955
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:19:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_4_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/0e114fbdfda6483eb1951a76d75068a7.
2024/10/22 11:19:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:19:46,559] Trial 4 finished with value: 0.777848235045445 and parameters: {'n_estimators': 617, 'learning_rate': 0.007647465508225788, 'max_depth': 15, 'num_leaves': 58, 'min_child_samples': 84, 'colsample_bytree': 0.5806268105146142, 'subsample': 0.7901381404131058, 'reg_alpha': 0.14141843344512076, 'reg_lambda': 1.6006064685379597}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.236228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98929
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:21:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_5_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/d2344683b4ae49599db0ec8c5c55bf8d.
2024/10/22 11:21:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:21:56,579] Trial 5 finished with value: 0.6057915874022406 and parameters: {'n_estimators': 725, 'learning_rate': 0.0001174613136126467, 'max_depth': 7, 'num_leaves': 126, 'min_child_samples': 27, 'colsample_bytree': 0.9649192310462262, 'subsample': 0.9950473131434101, 'reg_alpha': 0.15447107356061535, 'reg_lambda': 0.11249475077542499}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227982 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98929
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 968
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:23:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_6_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/65d71f340c424a7b90495ef27d54afd8.
2024/10/22 11:23:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:23:10,644] Trial 6 finished with value: 0.6867469879518072 and parameters: {'n_estimators': 433, 'learning_rate': 0.0015131857069809308, 'max_depth': 11, 'num_leaves': 67, 'min_child_samples': 28, 'colsample_bytree': 0.6428254656957613, 'subsample': 0.9230940451058183, 'reg_alpha': 5.429509793155333, 'reg_lambda': 0.48481695356262944}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.221315 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:28:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_7_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/8aeedd39edc043d3a6342ae8a3b77970.
2024/10/22 11:28:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:28:09,315] Trial 7 finished with value: 0.6907630522088354 and parameters: {'n_estimators': 982, 'learning_rate': 0.00013305080321039924, 'max_depth': 15, 'num_leaves': 112, 'min_child_samples': 38, 'colsample_bytree': 0.7989591654480681, 'subsample': 0.994761824136281, 'reg_alpha': 0.018917553019344266, 'reg_lambda': 0.044275890108904106}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.231237 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98709
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:28:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_8_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/4159f46c8be44d8bba25df66b7d29e5e.
2024/10/22 11:28:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:28:49,159] Trial 8 finished with value: 0.593426336926654 and parameters: {'n_estimators': 343, 'learning_rate': 0.00019792066554578526, 'max_depth': 3, 'num_leaves': 142, 'min_child_samples': 78, 'colsample_bytree': 0.5119109338170331, 'subsample': 0.9159326285477216, 'reg_alpha': 0.01575401866285617, 'reg_lambda': 0.0016329535165821476}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.294659 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98953
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 971
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:31:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_9_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/437362bb180f4774b2bae49a195f460c.
2024/10/22 11:31:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:31:25,017] Trial 9 finished with value: 0.7075671105474529 and parameters: {'n_estimators': 601, 'learning_rate': 0.0023436397459271298, 'max_depth': 12, 'num_leaves': 66, 'min_child_samples': 20, 'colsample_bytree': 0.8516636918166096, 'subsample': 0.5489498248002522, 'reg_alpha': 0.08371965291656443, 'reg_lambda': 0.5154071116717374}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.233979 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98300
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 943
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:31:51 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_10_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/4da9f6ca5d10482d99c6335929514eb4.
2024/10/22 11:31:51 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:31:51,912] Trial 10 finished with value: 0.7983512999365885 and parameters: {'n_estimators': 160, 'learning_rate': 0.09749953522132912, 'max_depth': 9, 'num_leaves': 24, 'min_child_samples': 99, 'colsample_bytree': 0.6784148691426505, 'subsample': 0.8329801465403852, 'reg_alpha': 0.00010383682814162627, 'reg_lambda': 0.002626988486040622}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.235225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98756
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:34:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_11_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/13ff7dcba0154935a56b9ba1db630faa.
2024/10/22 11:34:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:34:28,241] Trial 11 finished with value: 0.8150496723737054 and parameters: {'n_estimators': 893, 'learning_rate': 0.08956062961521026, 'max_depth': 13, 'num_leaves': 39, 'min_child_samples': 64, 'colsample_bytree': 0.7080139548983893, 'subsample': 0.6317868406394864, 'reg_alpha': 0.00060185745585623, 'reg_lambda': 0.011764015161570822}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.227267 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98816
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 961
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:40:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_12_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/66deab9f8839468dae8774ba52ffa74a.
2024/10/22 11:40:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:40:44,091] Trial 12 finished with value: 0.8140984992601987 and parameters: {'n_estimators': 976, 'learning_rate': 0.09930147728980455, 'max_depth': 14, 'num_leaves': 30, 'min_child_samples': 54, 'colsample_bytree': 0.7259685438750993, 'subsample': 0.5974954714318003, 'reg_alpha': 0.00010636946477621968, 'reg_lambda': 0.011141591267014927}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.292276 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98756
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:42:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_13_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/0ec6259a2dc94c239ba4741042baaa36.
2024/10/22 11:42:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:42:22,010] Trial 13 finished with value: 0.6545127879940816 and parameters: {'n_estimators': 440, 'learning_rate': 0.0006768848406828988, 'max_depth': 9, 'num_leaves': 40, 'min_child_samples': 65, 'colsample_bytree': 0.7629499289772879, 'subsample': 0.6365682110707899, 'reg_alpha': 0.0008202028021306179, 'reg_lambda': 0.00020326402318293676}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.231606 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:45:04 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_14_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/8105d85393064892adf634837f0f4ffe.
2024/10/22 11:45:04 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:45:04,289] Trial 14 finished with value: 0.7948636651870641 and parameters: {'n_estimators': 843, 'learning_rate': 0.008608084100998354, 'max_depth': 13, 'num_leaves': 48, 'min_child_samples': 50, 'colsample_bytree': 0.6757686158588684, 'subsample': 0.5293157704528352, 'reg_alpha': 0.0009567042702187231, 'reg_lambda': 0.008805826531905977}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.237322 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98756
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 958
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:46:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_15_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/e79c9d0ad77149b4addb6ffcf0b94bf2.
2024/10/22 11:46:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:46:28,645] Trial 15 finished with value: 0.8080744028746565 and parameters: {'n_estimators': 512, 'learning_rate': 0.038457721116952226, 'max_depth': 11, 'num_leaves': 88, 'min_child_samples': 65, 'colsample_bytree': 0.8733945331804378, 'subsample': 0.8128311092729773, 'reg_alpha': 0.000803194431902124, 'reg_lambda': 0.11908630122055827}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.243856 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:47:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_16_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/4dfc98f648094e0dbfc621ef2f71fda4.
2024/10/22 11:47:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:47:16,047] Trial 16 finished with value: 0.8017332487846122 and parameters: {'n_estimators': 848, 'learning_rate': 0.05484112975565924, 'max_depth': 3, 'num_leaves': 80, 'min_child_samples': 42, 'colsample_bytree': 0.6338177357638769, 'subsample': 0.694056509447847, 'reg_alpha': 0.0033416759027423974, 'reg_lambda': 0.0003371435171972726}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.224587 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98732
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 957
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:48:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_17_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/810e99bd8d4f4cf0b9cc2b0084aebb30.
2024/10/22 11:48:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:48:29,729] Trial 17 finished with value: 0.7730923694779116 and parameters: {'n_estimators': 271, 'learning_rate': 0.01657321873843553, 'max_depth': 13, 'num_leaves': 45, 'min_child_samples': 68, 'colsample_bytree': 0.7214862383882035, 'subsample': 0.5836670866040669, 'reg_alpha': 0.00028982073238297554, 'reg_lambda': 0.0284779210227178}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.432786 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98653
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 954
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:49:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_18_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/54ff3bb2fb154ebfac7e8efac34cdf69.
2024/10/22 11:49:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:49:54,073] Trial 18 finished with value: 0.7678080744028747 and parameters: {'n_estimators': 521, 'learning_rate': 0.007641515748449216, 'max_depth': 15, 'num_leaves': 20, 'min_child_samples': 88, 'colsample_bytree': 0.9742993873333452, 'subsample': 0.8471393894528219, 'reg_alpha': 0.0030468681538881183, 'reg_lambda': 0.0015065811488789635}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.231543 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 99035
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 985
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:54:05 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_19_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/90bee2c2c2444cb0abd2acb3b4188a8d.
2024/10/22 11:54:05 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:54:05,169] Trial 19 finished with value: 0.6581061086451068 and parameters: {'n_estimators': 652, 'learning_rate': 0.0005246453810402703, 'max_depth': 10, 'num_leaves': 93, 'min_child_samples': 11, 'colsample_bytree': 0.8173943510072272, 'subsample': 0.7582085292537486, 'reg_alpha': 0.000328858613525456, 'reg_lambda': 0.11693598585071902}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.252253 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:56:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_20_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/d530adffd6b14a528e536e1890fc1bcb.
2024/10/22 11:56:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:56:31,135] Trial 20 finished with value: 0.7865144789685056 and parameters: {'n_estimators': 865, 'learning_rate': 0.013129885380327274, 'max_depth': 7, 'num_leaves': 36, 'min_child_samples': 40, 'colsample_bytree': 0.5070248979626808, 'subsample': 0.8833976215520494, 'reg_alpha': 0.003522977173957798, 'reg_lambda': 0.005396557152916155}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.232233 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98798
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 11:59:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_21_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/a9ee6cb724164e5991b70afa134398a9.
2024/10/22 11:59:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 11:59:13,396] Trial 21 finished with value: 0.8157894736842105 and parameters: {'n_estimators': 982, 'learning_rate': 0.0813822624413181, 'max_depth': 14, 'num_leaves': 30, 'min_child_samples': 58, 'colsample_bytree': 0.7376615327686076, 'subsample': 0.6168588736722702, 'reg_alpha': 0.00012706534108108332, 'reg_lambda': 0.015124246162189955}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.224815 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98778
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:01:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_22_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/1de44289af1f4a9d9516502db7e0e558.
2024/10/22 12:01:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:01:30,947] Trial 22 finished with value: 0.816423589093215 and parameters: {'n_estimators': 903, 'learning_rate': 0.05919505846829604, 'max_depth': 14, 'num_leaves': 56, 'min_child_samples': 61, 'colsample_bytree': 0.7552086205007132, 'subsample': 0.6315568628929592, 'reg_alpha': 0.0002476178844811542, 'reg_lambda': 0.029009408369489367}. Best is trial 3 with value: 0.8174804481082224.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.433562 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:04:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_23_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/cdaf3d077c794210a04a0f6c159bdf8b.
2024/10/22 12:04:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:04:22,198] Trial 23 finished with value: 0.8191714225322342 and parameters: {'n_estimators': 929, 'learning_rate': 0.051530589446129725, 'max_depth': 15, 'num_leaves': 52, 'min_child_samples': 47, 'colsample_bytree': 0.8828048266741535, 'subsample': 0.6986547669586318, 'reg_alpha': 0.00018857392522993616, 'reg_lambda': 0.04673944325563521}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.228187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:06:54 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_24_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/66a5893f338d4627975484fa90086256.
2024/10/22 12:06:54 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:06:54,521] Trial 24 finished with value: 0.8114563517226802 and parameters: {'n_estimators': 788, 'learning_rate': 0.024392936539739285, 'max_depth': 14, 'num_leaves': 56, 'min_child_samples': 47, 'colsample_bytree': 0.8856282005632461, 'subsample': 0.6949707362145368, 'reg_alpha': 0.00029729345532986644, 'reg_lambda': 0.04731327836867059}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.422063 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98778
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:08:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_25_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/33bfd59f38ba4cb1a12dc6091dbad1ef.
2024/10/22 12:08:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:08:49,398] Trial 25 finished with value: 0.8171633904037201 and parameters: {'n_estimators': 666, 'learning_rate': 0.05487006146348431, 'max_depth': 12, 'num_leaves': 78, 'min_child_samples': 59, 'colsample_bytree': 0.9170626876971897, 'subsample': 0.7532046780432341, 'reg_alpha': 0.5131778128665597, 'reg_lambda': 0.14470583500500603}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.225993 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98709
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:10:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_26_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/3359a2b4b8ca44018c38bf750119f3b0.
2024/10/22 12:10:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:10:26,668] Trial 26 finished with value: 0.7239484252800676 and parameters: {'n_estimators': 444, 'learning_rate': 0.005058983525487912, 'max_depth': 12, 'num_leaves': 78, 'min_child_samples': 73, 'colsample_bytree': 0.9196798737269295, 'subsample': 0.7617979334783106, 'reg_alpha': 0.8076315957543614, 'reg_lambda': 0.2318479053091433}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.240530 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:12:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_27_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/c0fa88a88cf94467a81b3dd1a39c51eb.
2024/10/22 12:12:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:12:11,665] Trial 27 finished with value: 0.8147326146692031 and parameters: {'n_estimators': 571, 'learning_rate': 0.05046322189805284, 'max_depth': 12, 'num_leaves': 104, 'min_child_samples': 46, 'colsample_bytree': 0.9329750350205127, 'subsample': 0.7188237236494903, 'reg_alpha': 0.5987552817962856, 'reg_lambda': 0.23376090452984616}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.248015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:14:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_28_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/712f8db421a842158d333941b56d3fec.
2024/10/22 12:14:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:14:32,558] Trial 28 finished with value: 0.808708518283661 and parameters: {'n_estimators': 673, 'learning_rate': 0.0234753277683869, 'max_depth': 15, 'num_leaves': 76, 'min_child_samples': 35, 'colsample_bytree': 0.8276460216965509, 'subsample': 0.7891819008574185, 'reg_alpha': 0.04177236661123706, 'reg_lambda': 8.896674584351345}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.226536 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98709
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 956
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:15:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_29_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/2df26519915b4ac4aeca4c4245a9cee5.
2024/10/22 12:15:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:15:12,454] Trial 29 finished with value: 0.7491016698372437 and parameters: {'n_estimators': 350, 'learning_rate': 0.02442963711758247, 'max_depth': 5, 'num_leaves': 71, 'min_child_samples': 74, 'colsample_bytree': 0.9275475358061254, 'subsample': 0.669969063648642, 'reg_alpha': 0.9684606057715454, 'reg_lambda': 2.2544216024723838}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.398770 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 98798
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 960
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:17:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_30_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/66029fe848c748a3bda8ee9f422d468e.
2024/10/22 12:17:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:17:07,412] Trial 30 finished with value: 0.8146269287677024 and parameters: {'n_estimators': 807, 'learning_rate': 0.046881581474798886, 'max_depth': 10, 'num_leaves': 89, 'min_child_samples': 57, 'colsample_bytree': 0.8847479227230458, 'subsample': 0.8609251300492214, 'reg_alpha': 0.40368133890416946, 'reg_lambda': 1.1108498220886374}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.238773 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98778
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 959
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


2024/10/22 12:19:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run Trial_31_LightGBM_SMOTE_TFIDF_Trigrams at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5/runs/aecac74543414481a0edd12d912e1d47.
2024/10/22 12:19:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Sharad-18/Youtube-comment-analysis-Plugin.mlflow/#/experiments/5.
[I 2024-10-22 12:19:35,212] Trial 31 finished with value: 0.8182202494187275 and parameters: {'n_estimators': 911, 'learning_rate': 0.05856963779850545, 'max_depth': 14, 'num_leaves': 57, 'min_child_samples': 60, 'colsample_bytree': 0.7649055766666117, 'subsample': 0.6638750761753872, 'reg_alpha': 2.08940387359133, 'reg_lambda': 0.029749418513427318}. Best is trial 23 with value: 0.8191714225322342.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.407205 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 98906
[LightGBM] [Info] Number of data points in the train set: 37848, number of used features: 966
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612


Exception ignored on calling ctypes callback function: <function _log_callback at 0x7fb721d96170>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/lightgbm/basic.py", line 257, in _log_callback
    def _log_callback(msg: bytes) -> None:
KeyboardInterrupt: 


No further splits with positive gain, best gain: -inf
