In [1]:
import numpy as np
import pandas as pd
import mlflow

import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_predict, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN

import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sudhirjoon/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/sudhirjoon/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [5]:
mlflow.set_tracking_uri("http://13.60.79.0:5000")
mlflow.set_experiment("Exp 6 - Detailed HPT of XGboost")

2024/12/30 15:13:12 INFO mlflow.tracking.fluent: Experiment with name 'Exp 6 - Detailed HPT of XGboost' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://capstone-yt-mlflow-bucket/717286515719630891', creation_time=1735567992877, experiment_id='717286515719630891', last_update_time=1735567992877, lifecycle_stage='active', name='Exp 6 - Detailed HPT of XGboost', tags={}>

In [6]:
df = pd.read_csv('reddit_preprocessing.csv').dropna(subset=['clean_comment'])
df.shape

(36662, 2)

In [9]:
# Step 1: Remap the class labels from [-1, 0, 1] to [2, 0, 1]
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

# Step 2: Remove rows where the target labels (category) are NaN
df = df.dropna(subset=['category'])

ngram_range = (1, 2)  # Bigram setting
max_features = 1000  # Set max_features to 1000 for TF-IDF

# Step 4: Train-test split before vectorization and resampling
X_train, X_test, y_train, y_test = train_test_split(df['clean_comment'], df['category'], test_size=0.2, random_state=42, stratify=df['category'])

# Step 2: Vectorization using TF-IDF, fit on training data only
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X_train_vec = vectorizer.fit_transform(X_train)  # Fit on training data
X_test_vec = vectorizer.transform(X_test)  # Transform test data

# Step 3: Imbalance dataset using ADASYN
adasyn = ADASYN(random_state=42)
X_train_vec, y_train = adasyn.fit_resample(X_train_vec, y_train)


# Function to log results in MLflow
def log_mlflow(model_name, model, X_train, X_test, y_train, y_test):
    with mlflow.start_run():
        # Log model type
        mlflow.set_tag("mlflow.runName", f"{model_name}_ADASYN_TFIDF_Bigrams")
        mlflow.set_tag("experiment_type", "algorithm_comparison")

        # Log algorithm name as a parameter
        mlflow.log_param("algo_name", model_name)

        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log accuracy
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        # Log classification report
        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Log the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")


# Step 6: Optuna objective function for XGBoost
def objective_xgboost(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 0.2),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'gamma': trial.suggest_float('gamma', 1e-8, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0),
        'random_state': 42
    }

    model = XGBClassifier(**params)
     
    # Log trial in MLflow
    with mlflow.start_run(nested=True):  # Nested allows Optuna trials to appear under a single experiment
        # Log trial number in MLflow
        mlflow.set_tag("mlflow.runName", f"XGB_Trial_{trial.number}")
        # Log trial parameters
        for param_name, param_value in params.items():
            mlflow.log_param(param_name, param_value)

        # Train and evaluate the model
        model.fit(X_train_vec, y_train)
        y_pred = model.predict(X_test_vec)
        accuracy = accuracy_score(y_test, y_pred)

        # Log trial metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("trial_number", trial.number)

    # Return the metric for Optuna optimization
    return accuracy


# Step 7: Run Optuna for XGBoost, log the best model only
def run_optuna_experiment():
    study = optuna.create_study(direction="maximize")
    study.optimize(objective_xgboost, n_trials=50)

    # Get the best parameters and log only the best model
    best_params = study.best_params
    best_model = XGBClassifier(
        n_estimators=best_params['n_estimators'],
        max_depth=best_params['max_depth'],
        learning_rate=best_params['learning_rate'],
        min_child_weight=best_params['min_child_weight'],
        subsample=best_params['subsample'],
        colsample_bytree=best_params['colsample_bytree'],
        gamma=best_params['gamma'],
        reg_alpha=best_params['reg_alpha'],
        reg_lambda=best_params['reg_lambda'],
        random_state=42
    )

    # Log the best model with MLflow, passing the algo_name as "xgboost"
    log_mlflow("XGBoost", best_model, X_train_vec, X_test_vec, y_train, y_test)

# Run the experiment for XGBoost
run_optuna_experiment()


[I 2024-12-30 17:12:31,230] A new study created in memory with name: no-name-86bf33de-5d7c-45fa-8155-c7ba7cf5da87
[I 2024-12-30 17:12:48,536] Trial 0 finished with value: 0.8861516804504663 and parameters: {'n_estimators': 459, 'max_depth': 8, 'learning_rate': 0.03473579516692396, 'min_child_weight': 5, 'subsample': 0.8854880480680802, 'colsample_bytree': 0.915293421182102, 'gamma': 0.12087953413191507, 'reg_alpha': 0.9679521809590507, 'reg_lambda': 0.4678693768186719}. Best is trial 0 with value: 0.8861516804504663.


🏃 View run XGB_Trial_0 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/f731451bf7c54a889abe0512c1497c39
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:12:58,706] Trial 1 finished with value: 0.8989970086222066 and parameters: {'n_estimators': 230, 'max_depth': 7, 'learning_rate': 0.15029411109105711, 'min_child_weight': 4, 'subsample': 0.8048954024341375, 'colsample_bytree': 0.9337650718012495, 'gamma': 0.1476344158522324, 'reg_alpha': 0.09035227834822597, 'reg_lambda': 0.7137065673027699}. Best is trial 1 with value: 0.8989970086222066.


🏃 View run XGB_Trial_1 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/bd9eb0e06e3c41bc86883e2953a16b8f
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:06,863] Trial 2 finished with value: 0.8402252331515044 and parameters: {'n_estimators': 315, 'max_depth': 5, 'learning_rate': 0.028454343945158184, 'min_child_weight': 5, 'subsample': 0.8813301257352621, 'colsample_bytree': 0.8677784863384903, 'gamma': 0.5856571323061787, 'reg_alpha': 0.3855980492923203, 'reg_lambda': 0.6601427001742962}. Best is trial 1 with value: 0.8989970086222066.


🏃 View run XGB_Trial_2 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/c56286ac3c124961a050ef11c9a942cb
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:11,360] Trial 3 finished with value: 0.8789371810663382 and parameters: {'n_estimators': 128, 'max_depth': 7, 'learning_rate': 0.1337626176736154, 'min_child_weight': 7, 'subsample': 0.7747625673074064, 'colsample_bytree': 0.6413378427661388, 'gamma': 0.9457138778315456, 'reg_alpha': 0.7238995161226665, 'reg_lambda': 0.26914601609864974}. Best is trial 1 with value: 0.8989970086222066.


🏃 View run XGB_Trial_3 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/bd90d4a9f1e3412581f610778ea739da
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:16,492] Trial 4 finished with value: 0.8646841456976949 and parameters: {'n_estimators': 112, 'max_depth': 4, 'learning_rate': 0.17691910548950282, 'min_child_weight': 2, 'subsample': 0.6970508408280347, 'colsample_bytree': 0.9388023285735021, 'gamma': 0.5343238037944831, 'reg_alpha': 0.2746581129742974, 'reg_lambda': 0.8067012116815028}. Best is trial 1 with value: 0.8989970086222066.


🏃 View run XGB_Trial_4 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/9ec7971dd4594992922a7fb08063a2fa
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:26,724] Trial 5 finished with value: 0.877705437269048 and parameters: {'n_estimators': 411, 'max_depth': 10, 'learning_rate': 0.026783273489925916, 'min_child_weight': 5, 'subsample': 0.7100366723046353, 'colsample_bytree': 0.7133958168714646, 'gamma': 0.561616109266707, 'reg_alpha': 0.4357730486350516, 'reg_lambda': 0.9736590020272176}. Best is trial 1 with value: 0.8989970086222066.


🏃 View run XGB_Trial_5 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/356ba59a032f4f48a76a35f10ef54311
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:34,094] Trial 6 finished with value: 0.8632764385007918 and parameters: {'n_estimators': 327, 'max_depth': 6, 'learning_rate': 0.03678512293662114, 'min_child_weight': 2, 'subsample': 0.7893975326562523, 'colsample_bytree': 0.8170169668262078, 'gamma': 0.6231408860381361, 'reg_alpha': 0.361281810489193, 'reg_lambda': 0.5854720202838366}. Best is trial 1 with value: 0.8989970086222066.


🏃 View run XGB_Trial_6 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/d86e18cd05c641d888b0073261ba3b2b
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:37,126] Trial 7 finished with value: 0.8780573640682738 and parameters: {'n_estimators': 110, 'max_depth': 6, 'learning_rate': 0.19005439060040696, 'min_child_weight': 7, 'subsample': 0.7889999916447461, 'colsample_bytree': 0.8237506663520804, 'gamma': 0.40580958723777893, 'reg_alpha': 0.7163996022790813, 'reg_lambda': 0.497696600435742}. Best is trial 1 with value: 0.8989970086222066.


🏃 View run XGB_Trial_7 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/77fe98f631324ff19196a896452576f6
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:48,216] Trial 8 finished with value: 0.9021643498152384 and parameters: {'n_estimators': 397, 'max_depth': 10, 'learning_rate': 0.13197483329001283, 'min_child_weight': 3, 'subsample': 0.8440107623236079, 'colsample_bytree': 0.6695750447966359, 'gamma': 0.8847512076838875, 'reg_alpha': 0.6215249718576572, 'reg_lambda': 0.35165204118345816}. Best is trial 8 with value: 0.9021643498152384.


🏃 View run XGB_Trial_8 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/3054e5e773bd4c3091ce77dba2a8a8de
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:13:50,671] Trial 9 finished with value: 0.8477916593348583 and parameters: {'n_estimators': 94, 'max_depth': 3, 'learning_rate': 0.18344356568167633, 'min_child_weight': 1, 'subsample': 0.8225650649610503, 'colsample_bytree': 0.7739779384861692, 'gamma': 0.8776570371076085, 'reg_alpha': 0.5895051067421218, 'reg_lambda': 0.7519972043766987}. Best is trial 8 with value: 0.9021643498152384.


🏃 View run XGB_Trial_9 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/c963b99ca77c4f25ae288e1ce6d358cc
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:14:03,365] Trial 10 finished with value: 0.905859581207109 and parameters: {'n_estimators': 500, 'max_depth': 10, 'learning_rate': 0.08755777748298074, 'min_child_weight': 3, 'subsample': 0.9608584197987479, 'colsample_bytree': 0.6136857297538265, 'gamma': 0.7600248036126764, 'reg_alpha': 0.8772585618325919, 'reg_lambda': 0.06760120554429616}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_10 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/34fed6ee8c0948a79224fdb0ab191175
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:14:20,395] Trial 11 finished with value: 0.904275910610593 and parameters: {'n_estimators': 480, 'max_depth': 10, 'learning_rate': 0.08661639152239207, 'min_child_weight': 3, 'subsample': 0.995626137405639, 'colsample_bytree': 0.6006826520143975, 'gamma': 0.7914909936211203, 'reg_alpha': 0.946658307605323, 'reg_lambda': 0.011645716704625768}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_11 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/cad4d67e27bd438395bad3a9c3d54a9a
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:14:32,745] Trial 12 finished with value: 0.9049797642090445 and parameters: {'n_estimators': 476, 'max_depth': 9, 'learning_rate': 0.07641230215637804, 'min_child_weight': 3, 'subsample': 0.9992875519918054, 'colsample_bytree': 0.6140482386277367, 'gamma': 0.7768439549810419, 'reg_alpha': 0.999269026714292, 'reg_lambda': 0.012345900724049065}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_12 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/05937ecf920a4bb2a015744ad3bf5a14
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:14:46,993] Trial 13 finished with value: 0.9051557276086574 and parameters: {'n_estimators': 495, 'max_depth': 9, 'learning_rate': 0.07697536082636515, 'min_child_weight': 3, 'subsample': 0.9951821898400148, 'colsample_bytree': 0.734843440081378, 'gamma': 0.7388363607593368, 'reg_alpha': 0.8370707840334008, 'reg_lambda': 0.011306420537330745}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_13 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/e2b91a20bee64fa9a28b5c9b6c7bc79d
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:14:54,039] Trial 14 finished with value: 0.8812247052613057 and parameters: {'n_estimators': 212, 'max_depth': 8, 'learning_rate': 0.06514222956408562, 'min_child_weight': 1, 'subsample': 0.9404029872533594, 'colsample_bytree': 0.7300281140509696, 'gamma': 0.3653222134654255, 'reg_alpha': 0.8138324754496263, 'reg_lambda': 0.17879864699081777}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_14 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/288cbf4c8e754c9d8b9eb3e0c85c04f4
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:15:03,012] Trial 15 finished with value: 0.8914305824388528 and parameters: {'n_estimators': 405, 'max_depth': 9, 'learning_rate': 0.11175754956518634, 'min_child_weight': 4, 'subsample': 0.6287357581741646, 'colsample_bytree': 0.6874555254051439, 'gamma': 0.7182794786665349, 'reg_alpha': 0.8333572450186144, 'reg_lambda': 0.15853044610755654}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_15 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/61e4612a085d4226b06e31b485657d86
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:15:15,067] Trial 16 finished with value: 0.7846207988738343 and parameters: {'n_estimators': 336, 'max_depth': 9, 'learning_rate': 0.0035779419782839827, 'min_child_weight': 2, 'subsample': 0.9570399627516288, 'colsample_bytree': 0.7570241877144833, 'gamma': 0.9947769091809484, 'reg_alpha': 0.8337353435091444, 'reg_lambda': 0.1423675686477356}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_16 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/28a09d4126fc49fb926d63c9e95efaab
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:15:25,670] Trial 17 finished with value: 0.8872074608481436 and parameters: {'n_estimators': 499, 'max_depth': 8, 'learning_rate': 0.10395981711025926, 'min_child_weight': 6, 'subsample': 0.9293677493835129, 'colsample_bytree': 0.6641897944957889, 'gamma': 0.6862825972550762, 'reg_alpha': 0.5988500610081786, 'reg_lambda': 0.3428521763125259}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_17 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/48d50e30e1d5431c95d5a2a72a26361b
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:15:39,890] Trial 18 finished with value: 0.8997008622206581 and parameters: {'n_estimators': 435, 'max_depth': 9, 'learning_rate': 0.06116896454992544, 'min_child_weight': 4, 'subsample': 0.895738993812859, 'colsample_bytree': 0.8612513974225128, 'gamma': 0.41939228450583854, 'reg_alpha': 0.7460189250575034, 'reg_lambda': 0.09561466482952785}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_18 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/a6cced221a4d415cb008207daff528cb
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:15:53,606] Trial 19 finished with value: 0.898469118423368 and parameters: {'n_estimators': 365, 'max_depth': 10, 'learning_rate': 0.05342798779514426, 'min_child_weight': 3, 'subsample': 0.9657648984088751, 'colsample_bytree': 0.7170301114623789, 'gamma': 0.31000146006742957, 'reg_alpha': 0.8791259710925128, 'reg_lambda': 0.27539996501973607}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_19 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/d126dc1bb4d245ebaf1fb49c04bfcf79
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:16:06,195] Trial 20 finished with value: 0.9007566426183354 and parameters: {'n_estimators': 271, 'max_depth': 8, 'learning_rate': 0.08694657604424544, 'min_child_weight': 2, 'subsample': 0.910120014604113, 'colsample_bytree': 0.7875501348894215, 'gamma': 0.0006189386119706541, 'reg_alpha': 0.18879452253929785, 'reg_lambda': 0.0019678621586101154}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_20 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/fd279ed51097421a8aad6d47c497c533
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:16:19,065] Trial 21 finished with value: 0.9055076544078832 and parameters: {'n_estimators': 499, 'max_depth': 9, 'learning_rate': 0.0732521841396691, 'min_child_weight': 3, 'subsample': 0.9782798695754069, 'colsample_bytree': 0.6167676607613707, 'gamma': 0.7799873530024167, 'reg_alpha': 0.9981890760349367, 'reg_lambda': 0.06543493298945624}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_21 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/b8c43c24c360480397b8b4fd57799c9a
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:16:31,156] Trial 22 finished with value: 0.9000527890198838 and parameters: {'n_estimators': 499, 'max_depth': 9, 'learning_rate': 0.11177813462296377, 'min_child_weight': 4, 'subsample': 0.9718710233025586, 'colsample_bytree': 0.6294658601947707, 'gamma': 0.8057255548219229, 'reg_alpha': 0.9063405324967796, 'reg_lambda': 0.0799243080661593}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_22 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/399365a655294a178821462c0dac2e75
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:16:43,056] Trial 23 finished with value: 0.9037480204117544 and parameters: {'n_estimators': 446, 'max_depth': 10, 'learning_rate': 0.09088904083209468, 'min_child_weight': 3, 'subsample': 0.8574466542891264, 'colsample_bytree': 0.6509796624295606, 'gamma': 0.6814364954409574, 'reg_alpha': 0.9922479222467705, 'reg_lambda': 0.24910044415761481}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_23 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/3c518de70f9247e188b6f55272dd5e99
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:16:59,791] Trial 24 finished with value: 0.8940700334330459 and parameters: {'n_estimators': 433, 'max_depth': 7, 'learning_rate': 0.051935901371796836, 'min_child_weight': 2, 'subsample': 0.9302819091310548, 'colsample_bytree': 0.9946546956302658, 'gamma': 0.8309350409285086, 'reg_alpha': 0.7817612725716876, 'reg_lambda': 0.4136877478290198}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_24 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/1a79910157a941f7a2516649b1c257e1
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:17:19,735] Trial 25 finished with value: 0.8997008622206581 and parameters: {'n_estimators': 370, 'max_depth': 9, 'learning_rate': 0.07252852341292866, 'min_child_weight': 4, 'subsample': 0.9968107179211606, 'colsample_bytree': 0.6887721387599886, 'gamma': 0.7440816395813825, 'reg_alpha': 0.6672985287061954, 'reg_lambda': 0.2033818139949751}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_25 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/50807062b46c40049f873822a008eadb
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:17:35,950] Trial 26 finished with value: 0.9053316910082703 and parameters: {'n_estimators': 458, 'max_depth': 8, 'learning_rate': 0.12682633676387706, 'min_child_weight': 3, 'subsample': 0.9668196814317592, 'colsample_bytree': 0.749962618268646, 'gamma': 0.6590873236383814, 'reg_alpha': 0.5165633983698927, 'reg_lambda': 0.07695911848033529}. Best is trial 10 with value: 0.905859581207109.


🏃 View run XGB_Trial_26 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/b12bc1b64c7a402bb3c217a131a9517e
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:17:49,124] Trial 27 finished with value: 0.907091325004399 and parameters: {'n_estimators': 456, 'max_depth': 8, 'learning_rate': 0.12702980460179747, 'min_child_weight': 1, 'subsample': 0.9138587404020032, 'colsample_bytree': 0.6041807767936443, 'gamma': 0.4878993959536768, 'reg_alpha': 0.44440899670930956, 'reg_lambda': 0.08851478798522996}. Best is trial 27 with value: 0.907091325004399.


🏃 View run XGB_Trial_27 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/d78aa51bfdd1453b8e143f9f1ecca777
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:17:59,501] Trial 28 finished with value: 0.9072672884040119 and parameters: {'n_estimators': 387, 'max_depth': 7, 'learning_rate': 0.1605945592202114, 'min_child_weight': 1, 'subsample': 0.8646890637453818, 'colsample_bytree': 0.6030039747480666, 'gamma': 0.4774803213474028, 'reg_alpha': 0.2636197243648627, 'reg_lambda': 0.1054685053556797}. Best is trial 28 with value: 0.9072672884040119.


🏃 View run XGB_Trial_28 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/0650d9c6f1134a1f8812d190fb057b1a
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:07,389] Trial 29 finished with value: 0.9077951786028506 and parameters: {'n_estimators': 374, 'max_depth': 5, 'learning_rate': 0.16112122016753683, 'min_child_weight': 1, 'subsample': 0.8659429879770952, 'colsample_bytree': 0.602499638291215, 'gamma': 0.2781800843397411, 'reg_alpha': 0.013188172484570604, 'reg_lambda': 0.40292714052291434}. Best is trial 29 with value: 0.9077951786028506.


🏃 View run XGB_Trial_29 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/a81c8f0efb2c466a80004a4ac8a8fe27
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:17,168] Trial 30 finished with value: 0.9079711420024635 and parameters: {'n_estimators': 366, 'max_depth': 5, 'learning_rate': 0.16179881212157068, 'min_child_weight': 1, 'subsample': 0.8724454705255686, 'colsample_bytree': 0.6838637216256762, 'gamma': 0.2419444431643665, 'reg_alpha': 0.0057151380435950805, 'reg_lambda': 0.5805046546294741}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_30 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/2da360bb758e46d69cc2e98b233a486c
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:24,602] Trial 31 finished with value: 0.9060355446067218 and parameters: {'n_estimators': 360, 'max_depth': 5, 'learning_rate': 0.1649024304113437, 'min_child_weight': 1, 'subsample': 0.8661235664062792, 'colsample_bytree': 0.6411222687770959, 'gamma': 0.23157298302838364, 'reg_alpha': 0.006649534482538402, 'reg_lambda': 0.5662399786995901}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_31 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/adc73b9183f54dfcb4848e9660e91f33
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:30,472] Trial 32 finished with value: 0.9025162766144642 and parameters: {'n_estimators': 286, 'max_depth': 5, 'learning_rate': 0.15727688211486782, 'min_child_weight': 1, 'subsample': 0.8175485099799606, 'colsample_bytree': 0.6005020281557463, 'gamma': 0.23482326707581966, 'reg_alpha': 0.12111142838886951, 'reg_lambda': 0.4393236393396524}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_32 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/5686748c43c74fef80656b53b14087c7
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:37,875] Trial 33 finished with value: 0.904275910610593 and parameters: {'n_estimators': 297, 'max_depth': 6, 'learning_rate': 0.1437634166670155, 'min_child_weight': 1, 'subsample': 0.8941079261990631, 'colsample_bytree': 0.6894745489931969, 'gamma': 0.4714716355928472, 'reg_alpha': 0.0364167083459051, 'reg_lambda': 0.5659873688410495}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_33 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/611b77877b7a4f03a9713f40d32b42fa
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:44,352] Trial 34 finished with value: 0.9067393982051734 and parameters: {'n_estimators': 388, 'max_depth': 4, 'learning_rate': 0.16950421174588412, 'min_child_weight': 1, 'subsample': 0.8391383771154939, 'colsample_bytree': 0.6599198930642294, 'gamma': 0.09760968295219372, 'reg_alpha': 0.21130187715774276, 'reg_lambda': 0.6686136009719146}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_34 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/91c564b23d4d46c9baa59e8084690366
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:49,126] Trial 35 finished with value: 0.8998768256202709 and parameters: {'n_estimators': 239, 'max_depth': 4, 'learning_rate': 0.19922587221619037, 'min_child_weight': 1, 'subsample': 0.881728296358437, 'colsample_bytree': 0.633150510638555, 'gamma': 0.29205577771425484, 'reg_alpha': 0.08525411493368587, 'reg_lambda': 0.3634983879223438}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_35 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/7aa2a57562914d2a99a68758693b22be
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:18:57,327] Trial 36 finished with value: 0.9065634348055605 and parameters: {'n_estimators': 341, 'max_depth': 7, 'learning_rate': 0.15432305981022923, 'min_child_weight': 2, 'subsample': 0.7520186374896376, 'colsample_bytree': 0.6796614410957429, 'gamma': 0.4961748276330945, 'reg_alpha': 0.29619878840982605, 'reg_lambda': 0.8073639652857167}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_36 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/cc13489a515343c79eee86c91891b03c
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:05,306] Trial 37 finished with value: 0.9065634348055605 and parameters: {'n_estimators': 425, 'max_depth': 5, 'learning_rate': 0.1391868822534392, 'min_child_weight': 2, 'subsample': 0.9069777347458218, 'colsample_bytree': 0.6437663623567993, 'gamma': 0.14989545991976094, 'reg_alpha': 0.16886447857755368, 'reg_lambda': 0.9111351359311005}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_37 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/f32eecbb3ec943c499c21eb9529cc762
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:16,082] Trial 38 finished with value: 0.9040999472109801 and parameters: {'n_estimators': 382, 'max_depth': 7, 'learning_rate': 0.12198181493452162, 'min_child_weight': 1, 'subsample': 0.7562333253739542, 'colsample_bytree': 0.6261899037282107, 'gamma': 0.3449337036247022, 'reg_alpha': 0.2873318446821697, 'reg_lambda': 0.6041526933285082}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_38 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/7485ff834e2e4be8ab6e6ed3da85ac8b
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:20,632] Trial 39 finished with value: 0.8967094844272392 and parameters: {'n_estimators': 189, 'max_depth': 6, 'learning_rate': 0.14844149708467438, 'min_child_weight': 1, 'subsample': 0.8688753172174765, 'colsample_bytree': 0.7042311625440507, 'gamma': 0.4492220683862046, 'reg_alpha': 0.4487732549538655, 'reg_lambda': 0.29448145249071456}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_39 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/ae3b707d286b4b7e85b10c3655c9752f
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:24,533] Trial 40 finished with value: 0.8958296674291747 and parameters: {'n_estimators': 316, 'max_depth': 3, 'learning_rate': 0.1733228432765037, 'min_child_weight': 2, 'subsample': 0.8407816685411499, 'colsample_bytree': 0.6017959772902296, 'gamma': 0.5559750332161224, 'reg_alpha': 0.35509302418417543, 'reg_lambda': 0.20888756497008304}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_40 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/1b4d0c3805474b5dbfdea5c7b1e74b74
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:31,397] Trial 41 finished with value: 0.9049797642090445 and parameters: {'n_estimators': 393, 'max_depth': 4, 'learning_rate': 0.1653052083296023, 'min_child_weight': 1, 'subsample': 0.8435955266769698, 'colsample_bytree': 0.6594683493824274, 'gamma': 0.04975218824112218, 'reg_alpha': 0.24080403360300268, 'reg_lambda': 0.6967583300878972}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_41 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/942fda57fa664c16b894bac8746a0f8e
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:38,014] Trial 42 finished with value: 0.9062115080063347 and parameters: {'n_estimators': 417, 'max_depth': 4, 'learning_rate': 0.17429878787297987, 'min_child_weight': 1, 'subsample': 0.805197569287647, 'colsample_bytree': 0.6613247301018349, 'gamma': 0.1353081919468532, 'reg_alpha': 0.06893297717236642, 'reg_lambda': 0.5188066627506368}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_42 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/c217bb93504d4c2e8cc5a169ea1451ae
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:44,634] Trial 43 finished with value: 0.9077951786028506 and parameters: {'n_estimators': 350, 'max_depth': 5, 'learning_rate': 0.18815279225890852, 'min_child_weight': 2, 'subsample': 0.8286000303433081, 'colsample_bytree': 0.6336906162980801, 'gamma': 0.20648768126210193, 'reg_alpha': 0.1256404843984147, 'reg_lambda': 0.48935106486072744}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_43 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/0eb1be0b369a427f940af2f4a202482c
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:19:53,797] Trial 44 finished with value: 0.9062115080063347 and parameters: {'n_estimators': 359, 'max_depth': 6, 'learning_rate': 0.18536504391416894, 'min_child_weight': 2, 'subsample': 0.9149966616585039, 'colsample_bytree': 0.6298408102957238, 'gamma': 0.23081645427463154, 'reg_alpha': 0.1439433314144388, 'reg_lambda': 0.4970071079868325}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_44 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/ec1c9849da044df4934620103fb83404
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:20:01,297] Trial 45 finished with value: 0.9060355446067218 and parameters: {'n_estimators': 344, 'max_depth': 5, 'learning_rate': 0.19880250707479113, 'min_child_weight': 2, 'subsample': 0.8753592554920006, 'colsample_bytree': 0.6180485774357906, 'gamma': 0.20036413755530963, 'reg_alpha': 0.11334344880018615, 'reg_lambda': 0.4091781237475994}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_45 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/17868a1d16ea42559a5491cd0590dd28
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:20:07,179] Trial 46 finished with value: 0.9032201302129157 and parameters: {'n_estimators': 316, 'max_depth': 5, 'learning_rate': 0.15884391884226792, 'min_child_weight': 1, 'subsample': 0.8204621831664533, 'colsample_bytree': 0.636326827184229, 'gamma': 0.5918236765847515, 'reg_alpha': 0.048510483456284455, 'reg_lambda': 0.5280520569034666}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_46 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/dcdcfbfbf12e4b53a4882aeec3995140
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:20:16,641] Trial 47 finished with value: 0.9062115080063347 and parameters: {'n_estimators': 408, 'max_depth': 7, 'learning_rate': 0.18068321305534368, 'min_child_weight': 1, 'subsample': 0.7784247998084806, 'colsample_bytree': 0.6002990596962247, 'gamma': 0.29009240030451355, 'reg_alpha': 0.3293370817152124, 'reg_lambda': 0.6426336957132047}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_47 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/1d9af3607e854ee7a10deb27a4774619
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:20:26,535] Trial 48 finished with value: 0.8916065458384655 and parameters: {'n_estimators': 464, 'max_depth': 6, 'learning_rate': 0.1351850127216817, 'min_child_weight': 5, 'subsample': 0.8540666899674528, 'colsample_bytree': 0.8572570587766205, 'gamma': 0.39325071908687603, 'reg_alpha': 0.020872485792999714, 'reg_lambda': 0.7495679728652853}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_48 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/fa324fe8216b432e80c1169db76f84c5
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891


[I 2024-12-30 17:20:37,896] Trial 49 finished with value: 0.907091325004399 and parameters: {'n_estimators': 267, 'max_depth': 6, 'learning_rate': 0.18779856116920668, 'min_child_weight': 2, 'subsample': 0.7993530839476384, 'colsample_bytree': 0.6762670388056441, 'gamma': 0.5119258999221035, 'reg_alpha': 0.4307342241387505, 'reg_lambda': 0.4444037679100022}. Best is trial 30 with value: 0.9079711420024635.


🏃 View run XGB_Trial_49 at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/709436fffe0f4284999783601ee0f2a9
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891




🏃 View run XGBoost_ADASYN_TFIDF_Bigrams at: http://13.60.79.0:5000/#/experiments/717286515719630891/runs/7a97f3bd43d04846895bc80c3615f9e5
🧪 View experiment at: http://13.60.79.0:5000/#/experiments/717286515719630891
