In [33]:
%pip install -q optuna lightgbm

Note: you may need to restart the kernel to use updated packages.




This experiment compares the performance of several classification algorithms (Logistic Regression, Naive Bayes, SVM, XGBoost, LightGBM). We apply Hyperparameter Tuning (HPT) to the complex models using Optuna, while fixing the feature engineering pipeline based on previous optimal choices:

* **Vectorization:** TF-IDF (Term Frequency-Inverse Document Frequency)
* **N-gram Range:** Bigram `(1, 2)` (Unigrams and Bigrams)
* **Max Features:** 1000
* **Imbalance Handling:** Undersampling (`RandomUnderSampler`)

## 1. Setup and Dependencies

### 1.1 Import Libraries

In [34]:
import optuna
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, ADASYN
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

## 2. MLflow and Data Preparation

### 2.1 MLflow Configuration

In [35]:
# Set the remote tracking server URI
mlflow.set_tracking_uri("http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/")

# Set or create a new experiment
mlflow.set_experiment("Model Comparision (TFIDF Bigram 1000 + Ensemble Imbalance) - Exp 5")

<Experiment: artifact_location='s3://mlfow-bucket-2025/17', creation_time=1763217601948, experiment_id='17', last_update_time=1763217601948, lifecycle_stage='active', name='Model Comparision (TFIDF Bigram 1000 + Ensemble Imbalance) - Exp 5', tags={'mlflow.experimentKind': 'custom_model_development'}>

### 2.2 Data Loading, Remapping, and Feature Engineering

In [36]:
df = pd.read_csv('../data/reddit_preprocessing.csv').dropna(subset=['clean_comment'])
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})
df = df.dropna(subset=['category'])

print(f"Data shape: {df.shape}")
print(f"Class distribution:\n{df['category'].value_counts().sort_index()}")

# Fixed parameters
ngram_range = (1, 2)
max_features = 1000

# Split and vectorize
X_train, X_test, y_train, y_test = train_test_split(
    df['clean_comment'], df['category'], 
    test_size=0.2, random_state=42, stratify=df['category']
)

vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

print(f"\nOriginal Training Data Shape: {X_train_vec.shape}")
print(f"Test Data Shape: {X_test_vec.shape}")

Data shape: (36662, 2)
Class distribution:
category
0    12644
1    15770
2     8248
Name: count, dtype: int64

Original Training Data Shape: (29329, 1000)
Test Data Shape: (7333, 1000)


### Resampled Data

In [37]:
print("\n" + "="*80)
print("CREATING 3 RESAMPLED DATASETS (TOP 3 IMBALANCE METHODS)")
print("="*80)

# Method 1: Undersampling
sampler_under = RandomUnderSampler(random_state=42)
X_train_under, y_train_under = sampler_under.fit_resample(X_train_vec, y_train)
print(f"1. Undersampled: {X_train_under.shape}")

# Method 2: Oversampling
sampler_over = RandomOverSampler(random_state=42)
X_train_over, y_train_over = sampler_over.fit_resample(X_train_vec, y_train)
print(f"2. Oversampled: {X_train_over.shape}")

# Method 3: ADASYN
sampler_adasyn = ADASYN(random_state=42)
X_train_adasyn, y_train_adasyn = sampler_adasyn.fit_resample(X_train_vec, y_train)
print(f"3. ADASYN: {X_train_adasyn.shape}")

# Store all datasets
datasets = {
    'undersampling': (X_train_under, y_train_under),
    'oversampling': (X_train_over, y_train_over),
    'adasyn': (X_train_adasyn, y_train_adasyn)
}


CREATING 3 RESAMPLED DATASETS (TOP 3 IMBALANCE METHODS)
1. Undersampled: (19794, 1000)
2. Oversampled: (37848, 1000)
3. ADASYN: (35909, 1000)


## 3. MLflow Logging and Evaluation Helper

In [38]:
def log_mlflow(model_name, model, params=None, imbalance_method="undersampling"):
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"{model_name}_{imbalance_method}_TFIDF(1000)_HPT")
        mlflow.set_tag("experiment_type", "multi_algo_hpt")
        mlflow.log_param("algo_name", model_name)
        mlflow.log_param("vectorizer_type", "TF-IDF")
        mlflow.log_param("ngram_range", str(ngram_range))
        mlflow.log_param("max_features", max_features)
        mlflow.log_param("imbalance_handling", imbalance_method)
        
        if params:
            for key, value in params.items():
                mlflow.log_param(key, value)

        # Predict
        y_pred = model.predict(X_test_vec)

        # Log metrics
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Confusion matrix
        conf_matrix = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
        plt.title(f"Confusion Matrix: {model_name} ({imbalance_method})")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.savefig(f"conf_matrix_{model_name}_{imbalance_method}.png")
        mlflow.log_artifact(f"conf_matrix_{model_name}_{imbalance_method}.png")
        plt.close()

        mlflow.sklearn.log_model(model, f"{model_name}_model")
        
        print(f"    ‚úì Logged with Accuracy: {accuracy:.4f}")

## 4. Hyperparameter Tuning Objectives (Optuna)

In [39]:
N_TRIALS = 10

def tune_logistic_regression(X_train, y_train):
    def objective(trial):
        C = trial.suggest_float('C', 1e-3, 10.0, log=True)
        solver = trial.suggest_categorical('solver', ['liblinear', 'lbfgs'])
        model = LogisticRegression(C=C, solver=solver, random_state=42, multi_class='auto', max_iter=1000)
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

def tune_linear_svc(X_train, y_train):
    def objective(trial):
        C = trial.suggest_float('C', 0.1, 10.0, log=True)
        model = LinearSVC(C=C, random_state=42, max_iter=1000, dual='auto')
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

def tune_xgboost(X_train, y_train):
    def objective(trial):
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
        max_depth = trial.suggest_int('max_depth', 3, 7)
        model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth,
                             random_state=42, use_label_encoder=False, eval_metric='mlogloss', n_jobs=-1)
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

def tune_lightgbm(X_train, y_train):
    def objective(trial):
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
        num_leaves = trial.suggest_int('num_leaves', 10, 50)
        model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, num_leaves=num_leaves,
                              random_state=42, verbose=-1)
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

## 5. Execution and MLflow Logging

In [40]:
def train_individual_models_with_hpt(model_name, tune_func):
    """
    Train separate models on each resampling method and log each one.
    This allows us to find which algorithm + sampling method combination works best.
    """
    print(f"\n{'='*80}")
    print(f"TRAINING {model_name} ON ALL 3 SAMPLING METHODS")
    print(f"{'='*80}")
    
    for method, (X_resample, y_resample) in datasets.items():
        print(f"\n  [{method.upper()}] Running HPT ({N_TRIALS} trials)...")
        
        # Tune hyperparameters
        best_params = tune_func(X_resample, y_resample)
        print(f"    Best params: {best_params}")
        
        # Train model with best params
        if model_name == 'LogisticRegression':
            model = LogisticRegression(random_state=42, multi_class='auto', max_iter=1000, **best_params)
        elif model_name == 'LinearSVC':
            model = LinearSVC(random_state=42, max_iter=1000, dual='auto', **best_params)
        elif model_name == 'XGBoost':
            model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss', n_jobs=-1, **best_params)
        elif model_name == 'LightGBM':
            model = LGBMClassifier(random_state=42, verbose=-1, **best_params)
        
        model.fit(X_resample, y_resample)
        
        # Log each model separately
        log_mlflow(f"{model_name}", model, params=best_params, imbalance_method=method)
    
    print(f"{'='*80}")

# Execute Training
# ============================================================================
print("\n" + "="*80)
print("STARTING MODEL TRAINING WITH ALL SAMPLING METHODS")
print("="*80)
print("Strategy: Train each algorithm on all 3 sampling methods separately")
print("This creates 12 models (4 algorithms √ó 3 sampling methods)")
print("MLflow will track all and identify the best combination")
print("="*80)

# Train baseline
print(f"\n{'='*80}")
print("BASELINE: MultinomialNB (No HPT)")
print(f"{'='*80}")
mnb = MultinomialNB(alpha=1.0)
mnb.fit(X_train_under, y_train_under)
log_mlflow('MultinomialNB', mnb, params={'alpha': 1.0}, imbalance_method='undersampling')

# Train all models on all sampling methods individually
models_to_tune = [
    ('LogisticRegression', tune_logistic_regression),
    ('LinearSVC', tune_linear_svc),
    ('XGBoost', tune_xgboost),
    ('LightGBM', tune_lightgbm)
]

for model_name, tune_func in models_to_tune:
    train_individual_models_with_hpt(model_name, tune_func)

print("\n" + "="*80)
print("‚úì ALL MODELS TRAINED AND LOGGED")
print("="*80)


STARTING MODEL TRAINING WITH ALL SAMPLING METHODS
Strategy: Train each algorithm on all 3 sampling methods separately
This creates 12 models (4 algorithms √ó 3 sampling methods)
MLflow will track all and identify the best combination

BASELINE: MultinomialNB (No HPT)




    ‚úì Logged with Accuracy: 0.7000
üèÉ View run MultinomialNB_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/03490333fe014eefbedad5d9db0a1bd9
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:09:07,678] A new study created in memory with name: no-name-05dce9b7-d596-414a-9c60-47b1757eb243
[I 2025-11-15 21:09:07,829] Trial 0 finished with value: 0.7809900450020455 and parameters: {'C': 2.8430254938819615, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7809900450020455.



TRAINING LogisticRegression ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:09:07,970] Trial 1 finished with value: 0.6941224601118232 and parameters: {'C': 0.017073324868662743, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7809900450020455.
[I 2025-11-15 21:09:08,061] Trial 2 finished with value: 0.7584890222282831 and parameters: {'C': 0.239696894463243, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7809900450020455.
[I 2025-11-15 21:09:08,165] Trial 3 finished with value: 0.7749897722623755 and parameters: {'C': 0.9082168404175678, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7809900450020455.
[I 2025-11-15 21:09:08,210] Trial 4 finished with value: 0.6808945861175508 and parameters: {'C': 0.011122852543296783, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.7809900450020455.
[I 2025-11-15 21:09:08,276] Trial 5 finished with value: 0.7231692349652257 and parameters: {'C': 0.06947622960784537, 'solver': 'liblinear'}. Best is trial 0 with value: 0.7809900450020455.
[I 2025-11-15 21:09:08,318] Trial 6 finished with valu

    Best params: {'C': 2.8430254938819615, 'solver': 'liblinear'}




    ‚úì Logged with Accuracy: 0.7810
üèÉ View run LogisticRegression_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/62d56a3afa6c4a86ae59634091f762bc
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:09:56,142] A new study created in memory with name: no-name-4cbd1547-ff1a-4472-ba12-c4719349083c
[I 2025-11-15 21:09:56,210] Trial 0 finished with value: 0.6653484249284058 and parameters: {'C': 0.0012769387851858896, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.6653484249284058.



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:09:56,427] Trial 1 finished with value: 0.7790808673121505 and parameters: {'C': 0.4451393088430444, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7790808673121505.
[I 2025-11-15 21:09:56,989] Trial 2 finished with value: 0.774307923087413 and parameters: {'C': 7.322040062779014, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7790808673121505.
[I 2025-11-15 21:09:57,037] Trial 3 finished with value: 0.6639847265784808 and parameters: {'C': 0.0011942528802026126, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7790808673121505.
[I 2025-11-15 21:09:57,230] Trial 4 finished with value: 0.7162143733806082 and parameters: {'C': 0.025034434885284267, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7790808673121505.
[I 2025-11-15 21:09:57,488] Trial 5 finished with value: 0.7778535387972181 and parameters: {'C': 0.284561013137554, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7790808673121505.
[I 2025-11-15 21:09:57,743] Trial 6 finished with value: 0.7

    Best params: {'C': 0.6079595364809569, 'solver': 'lbfgs'}




    ‚úì Logged with Accuracy: 0.7799
üèÉ View run LogisticRegression_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/0d8230c421384624bea20f1ccdb659ae
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:10:46,130] A new study created in memory with name: no-name-c8e3e74b-8e29-4fe5-88ca-9d2279a10221
[I 2025-11-15 21:10:46,202] Trial 0 finished with value: 0.5525705713896086 and parameters: {'C': 0.003211718299112583, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5525705713896086.



  [ADASYN] Running HPT (10 trials)...


[I 2025-11-15 21:10:46,494] Trial 1 finished with value: 0.7748534024273831 and parameters: {'C': 4.481530835385039, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7748534024273831.
[I 2025-11-15 21:10:46,725] Trial 2 finished with value: 0.7696713486976681 and parameters: {'C': 1.1589707588570284, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7748534024273831.
[I 2025-11-15 21:10:47,128] Trial 3 finished with value: 0.7703531978726306 and parameters: {'C': 1.8464319067149677, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7748534024273831.
[I 2025-11-15 21:10:47,336] Trial 4 finished with value: 0.7498977226237556 and parameters: {'C': 0.10385603105219761, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7748534024273831.
[I 2025-11-15 21:10:48,010] Trial 5 finished with value: 0.7733533342424656 and parameters: {'C': 9.494944128680755, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7748534024273831.
[I 2025-11-15 21:10:48,093] Trial 6 finished with value: 0.5243

    Best params: {'C': 4.481530835385039, 'solver': 'liblinear'}




    ‚úì Logged with Accuracy: 0.7749
üèÉ View run LogisticRegression_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/69bfe5a2947645099dc8450620926344
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:11:37,033] A new study created in memory with name: no-name-48245194-ae03-4965-b4d8-6701de2a9b86
[I 2025-11-15 21:11:37,158] Trial 0 finished with value: 0.7835810718669031 and parameters: {'C': 0.7871057111176922}. Best is trial 0 with value: 0.7835810718669031.



TRAINING LinearSVC ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:11:37,278] Trial 1 finished with value: 0.7834447020319105 and parameters: {'C': 1.6259775137305776}. Best is trial 0 with value: 0.7835810718669031.
[I 2025-11-15 21:11:37,460] Trial 2 finished with value: 0.7831719623619255 and parameters: {'C': 5.626503759367678}. Best is trial 0 with value: 0.7835810718669031.
[I 2025-11-15 21:11:37,561] Trial 3 finished with value: 0.7846720305468431 and parameters: {'C': 0.3007549948808244}. Best is trial 3 with value: 0.7846720305468431.
[I 2025-11-15 21:11:37,677] Trial 4 finished with value: 0.783035592526933 and parameters: {'C': 1.0830394120875897}. Best is trial 3 with value: 0.7846720305468431.
[I 2025-11-15 21:11:37,846] Trial 5 finished with value: 0.7834447020319105 and parameters: {'C': 4.30592685909814}. Best is trial 3 with value: 0.7846720305468431.
[I 2025-11-15 21:11:37,996] Trial 6 finished with value: 0.783853811536888 and parameters: {'C': 3.0833139661423936}. Best is trial 3 with value: 0.7846720305468431.
[I 

    Best params: {'C': 0.3007549948808244}




    ‚úì Logged with Accuracy: 0.7847
üèÉ View run LinearSVC_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/e269465b03354530bcd64f14f1112eeb
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:12:26,259] A new study created in memory with name: no-name-cd03e18e-869c-490a-ba49-4d87dc13395a



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:12:26,524] Trial 0 finished with value: 0.785217509886813 and parameters: {'C': 0.24976894947530465}. Best is trial 0 with value: 0.785217509886813.
[I 2025-11-15 21:12:26,730] Trial 1 finished with value: 0.784126551206873 and parameters: {'C': 0.11022736933756341}. Best is trial 0 with value: 0.785217509886813.
[I 2025-11-15 21:12:27,205] Trial 2 finished with value: 0.785217509886813 and parameters: {'C': 4.595128875378506}. Best is trial 0 with value: 0.785217509886813.
[I 2025-11-15 21:12:27,610] Trial 3 finished with value: 0.7856266193917906 and parameters: {'C': 3.008438504478455}. Best is trial 3 with value: 0.7856266193917906.
[I 2025-11-15 21:12:27,873] Trial 4 finished with value: 0.7846720305468431 and parameters: {'C': 0.3661013104861995}. Best is trial 3 with value: 0.7856266193917906.
[I 2025-11-15 21:12:28,291] Trial 5 finished with value: 0.7850811400518205 and parameters: {'C': 5.236249915151627}. Best is trial 3 with value: 0.7856266193917906.
[I 20

    Best params: {'C': 3.008438504478455}




    ‚úì Logged with Accuracy: 0.7856
üèÉ View run LinearSVC_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/362b9c7b8c5043b898c0c721605c1adf
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:13:17,583] A new study created in memory with name: no-name-2b3ef74e-c5cb-48ee-88cf-82551a42ea1c



  [ADASYN] Running HPT (10 trials)...


[I 2025-11-15 21:13:17,799] Trial 0 finished with value: 0.7737624437474431 and parameters: {'C': 0.11263466227519324}. Best is trial 0 with value: 0.7737624437474431.
[I 2025-11-15 21:13:18,045] Trial 1 finished with value: 0.777308059457248 and parameters: {'C': 0.3337232686716948}. Best is trial 1 with value: 0.777308059457248.
[I 2025-11-15 21:13:18,420] Trial 2 finished with value: 0.7779899086322105 and parameters: {'C': 1.6617919280262097}. Best is trial 2 with value: 0.7779899086322105.
[I 2025-11-15 21:13:18,908] Trial 3 finished with value: 0.7796263466521206 and parameters: {'C': 5.28902574562919}. Best is trial 3 with value: 0.7796263466521206.
[I 2025-11-15 21:13:19,100] Trial 4 finished with value: 0.7736260739124505 and parameters: {'C': 0.11092922989994208}. Best is trial 3 with value: 0.7796263466521206.
[I 2025-11-15 21:13:19,399] Trial 5 finished with value: 0.7781262784672031 and parameters: {'C': 0.8455269842451123}. Best is trial 3 with value: 0.7796263466521206.


    Best params: {'C': 5.28902574562919}




    ‚úì Logged with Accuracy: 0.7796
üèÉ View run LinearSVC_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/c65451a41ec44ab4a25ff9382c612ad8
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:14:08,079] A new study created in memory with name: no-name-204832ad-9303-4472-bcf1-d92f86080b9f



TRAINING XGBoost ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:14:15,056] Trial 0 finished with value: 0.585435701622801 and parameters: {'n_estimators': 129, 'learning_rate': 0.014380071313629403, 'max_depth': 3}. Best is trial 0 with value: 0.585435701622801.
[I 2025-11-15 21:14:24,866] Trial 1 finished with value: 0.5437065321150961 and parameters: {'n_estimators': 73, 'learning_rate': 0.001010916468867026, 'max_depth': 5}. Best is trial 0 with value: 0.585435701622801.
[I 2025-11-15 21:14:49,464] Trial 2 finished with value: 0.6215737078958135 and parameters: {'n_estimators': 170, 'learning_rate': 0.010543298992230167, 'max_depth': 5}. Best is trial 2 with value: 0.6215737078958135.
[I 2025-11-15 21:14:58,097] Trial 3 finished with value: 0.5663439247238511 and parameters: {'n_estimators': 54, 'learning_rate': 0.00583766727274635, 'max_depth': 5}. Best is trial 2 with value: 0.6215737078958135.
[I 2025-11-15 21:15:13,925] Trial 4 finished with value: 0.585162961952816 and parameters: {'n_estimators': 71, 'learning_rate': 0.006

    Best params: {'n_estimators': 160, 'learning_rate': 0.048172013817799286, 'max_depth': 7}




    ‚úì Logged with Accuracy: 0.7230
üèÉ View run XGBoost_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/766ce2b4779b42b2b219ecaa876d9611
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:19:33,035] A new study created in memory with name: no-name-23836a39-7dcc-47df-924c-3eb19c9bec2b



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:19:43,739] Trial 0 finished with value: 0.5782081003681986 and parameters: {'n_estimators': 121, 'learning_rate': 0.012936622996240193, 'max_depth': 3}. Best is trial 0 with value: 0.5782081003681986.
[I 2025-11-15 21:20:12,814] Trial 1 finished with value: 0.604254738851766 and parameters: {'n_estimators': 130, 'learning_rate': 0.009713238825723518, 'max_depth': 5}. Best is trial 1 with value: 0.604254738851766.
[I 2025-11-15 21:20:29,675] Trial 2 finished with value: 0.6252556934406109 and parameters: {'n_estimators': 186, 'learning_rate': 0.0164493137526255, 'max_depth': 3}. Best is trial 2 with value: 0.6252556934406109.
[I 2025-11-15 21:20:45,891] Trial 3 finished with value: 0.5716623482885586 and parameters: {'n_estimators': 171, 'learning_rate': 0.007382738116165913, 'max_depth': 3}. Best is trial 2 with value: 0.6252556934406109.
[I 2025-11-15 21:21:11,602] Trial 4 finished with value: 0.6969862266466658 and parameters: {'n_estimators': 128, 'learning_rate': 0

    Best params: {'n_estimators': 116, 'learning_rate': 0.07757267381768003, 'max_depth': 6}




    ‚úì Logged with Accuracy: 0.7277
üèÉ View run XGBoost_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/289c0a8439b04452bba0f67c230e97d7
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:26:50,356] A new study created in memory with name: no-name-0ef2a75f-5f1c-4ba7-85fe-106e3a7cc39e



  [ADASYN] Running HPT (10 trials)...


[I 2025-11-15 21:27:10,821] Trial 0 finished with value: 0.7282149188599482 and parameters: {'n_estimators': 153, 'learning_rate': 0.07973598582782566, 'max_depth': 4}. Best is trial 0 with value: 0.7282149188599482.
[I 2025-11-15 21:28:45,141] Trial 1 finished with value: 0.5930724123823811 and parameters: {'n_estimators': 171, 'learning_rate': 0.0017978501245179404, 'max_depth': 7}. Best is trial 0 with value: 0.7282149188599482.
[I 2025-11-15 21:29:19,531] Trial 2 finished with value: 0.593617891722351 and parameters: {'n_estimators': 62, 'learning_rate': 0.006715537905040577, 'max_depth': 7}. Best is trial 0 with value: 0.7282149188599482.
[I 2025-11-15 21:29:40,543] Trial 3 finished with value: 0.6817128051275058 and parameters: {'n_estimators': 199, 'learning_rate': 0.03979285289555586, 'max_depth': 3}. Best is trial 0 with value: 0.7282149188599482.
[I 2025-11-15 21:30:01,070] Trial 4 finished with value: 0.7295786172098732 and parameters: {'n_estimators': 131, 'learning_rate': 

    Best params: {'n_estimators': 139, 'learning_rate': 0.07735371272428565, 'max_depth': 7}




    ‚úì Logged with Accuracy: 0.7491
üèÉ View run XGBoost_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/10f623cdafb84cc9a00fd633c9e56a9e
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:35:08,548] A new study created in memory with name: no-name-77434f9e-2e40-4e5f-a80d-7dc62a21cb4d



TRAINING LightGBM ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:35:14,210] Trial 0 finished with value: 0.7732169644074731 and parameters: {'n_estimators': 118, 'learning_rate': 0.02593033621470207, 'num_leaves': 43}. Best is trial 0 with value: 0.7732169644074731.
[I 2025-11-15 21:35:16,181] Trial 1 finished with value: 0.7372153279694531 and parameters: {'n_estimators': 51, 'learning_rate': 0.025414749400699346, 'num_leaves': 32}. Best is trial 0 with value: 0.7732169644074731.
[I 2025-11-15 21:35:20,866] Trial 2 finished with value: 0.7126687576708032 and parameters: {'n_estimators': 129, 'learning_rate': 0.00404388300716934, 'num_leaves': 31}. Best is trial 0 with value: 0.7732169644074731.
[I 2025-11-15 21:35:22,994] Trial 3 finished with value: 0.7767625801172781 and parameters: {'n_estimators': 122, 'learning_rate': 0.08803174295637799, 'num_leaves': 15}. Best is trial 3 with value: 0.7767625801172781.
[I 2025-11-15 21:35:27,215] Trial 4 finished with value: 0.7818082640120005 and parameters: {'n_estimators': 150, 'learning_

    Best params: {'n_estimators': 150, 'learning_rate': 0.055888087897430924, 'num_leaves': 27}




    ‚úì Logged with Accuracy: 0.7818
üèÉ View run LightGBM_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/b2be38a31267493ebe6b76a48d7e4ba3
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:36:46,531] A new study created in memory with name: no-name-c8d0163c-af1a-4b42-b2ed-ea2b8f880c48



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-11-15 21:36:52,091] Trial 0 finished with value: 0.7369425882994681 and parameters: {'n_estimators': 114, 'learning_rate': 0.020316676907460713, 'num_leaves': 21}. Best is trial 0 with value: 0.7369425882994681.
[I 2025-11-15 21:37:00,355] Trial 1 finished with value: 0.6999863630165007 and parameters: {'n_estimators': 181, 'learning_rate': 0.00596098005881663, 'num_leaves': 21}. Best is trial 0 with value: 0.7369425882994681.
[I 2025-11-15 21:37:03,908] Trial 2 finished with value: 0.6693031501431883 and parameters: {'n_estimators': 78, 'learning_rate': 0.0013761861657969067, 'num_leaves': 19}. Best is trial 0 with value: 0.7369425882994681.
[I 2025-11-15 21:37:17,965] Trial 3 finished with value: 0.7392608754943406 and parameters: {'n_estimators': 134, 'learning_rate': 0.0010032308171960568, 'num_leaves': 49}. Best is trial 3 with value: 0.7392608754943406.
[I 2025-11-15 21:37:26,208] Trial 4 finished with value: 0.7383062866493931 and parameters: {'n_estimators': 88, 'learni

    Best params: {'n_estimators': 159, 'learning_rate': 0.04833901902672037, 'num_leaves': 25}




    ‚úì Logged with Accuracy: 0.7839
üèÉ View run LightGBM_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/cccd0adfe2a142d59f4c42e6a3efcfed
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17


[I 2025-11-15 21:39:16,161] A new study created in memory with name: no-name-0966948e-706c-4d80-a445-dc61bfb3f107



  [ADASYN] Running HPT (10 trials)...


[I 2025-11-15 21:39:20,347] Trial 0 finished with value: 0.7486703941088231 and parameters: {'n_estimators': 50, 'learning_rate': 0.010454865501484783, 'num_leaves': 49}. Best is trial 0 with value: 0.7486703941088231.
[I 2025-11-15 21:39:29,109] Trial 1 finished with value: 0.785490249556798 and parameters: {'n_estimators': 115, 'learning_rate': 0.03150369418385284, 'num_leaves': 46}. Best is trial 1 with value: 0.785490249556798.
[I 2025-11-15 21:39:38,992] Trial 2 finished with value: 0.7238510841401882 and parameters: {'n_estimators': 148, 'learning_rate': 0.002997506411675642, 'num_leaves': 35}. Best is trial 1 with value: 0.785490249556798.
[I 2025-11-15 21:39:47,769] Trial 3 finished with value: 0.7858993590617755 and parameters: {'n_estimators': 97, 'learning_rate': 0.03593581755262887, 'num_leaves': 48}. Best is trial 3 with value: 0.7858993590617755.
[I 2025-11-15 21:39:57,298] Trial 4 finished with value: 0.7908086731215055 and parameters: {'n_estimators': 196, 'learning_rat

    Best params: {'n_estimators': 196, 'learning_rate': 0.05437133178453067, 'num_leaves': 25}




    ‚úì Logged with Accuracy: 0.7908
üèÉ View run LightGBM_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17/runs/cf778d451434433fbe988012a3f0b387
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/17

‚úì ALL MODELS TRAINED AND LOGGED


## 6. Conclusion and Next Steps
The complete set of model performances, including optimized hyperparameters, is now logged in the MLflow UI. The next step is typically stacking or selecting the single best performing model based on comprehensive evaluation metrics, especially F1-scores for the minority classes.

In [42]:
OPTIMAL_METRIC = "weighted avg_f1-score"

try:
    client = mlflow.tracking.MlflowClient()
    experiment = client.get_experiment_by_name("Model Comparision (TFIDF Bigram 1000 + Ensemble Imbalance) - Exp 5")
    
    if experiment:
        experiment_id = experiment.experiment_id
        print(f"\n{'='*80}")
        print(f"FETCHING RESULTS FROM EXPERIMENT")
        print(f"{'='*80}")

        runs = client.search_runs(experiment_ids=experiment_id)
        run_data = []
        
        for run in runs:
            metrics = run.data.metrics
            params = run.data.params
            
            run_data.append({
                'run_id': run.info.run_id,
                'algo_name': params.get('algo_name', 'N/A'),
                'imbalance_handling': params.get('imbalance_handling', 'N/A'),
                'accuracy': metrics.get('accuracy', 0.0),
                'weighted_f1': metrics.get(OPTIMAL_METRIC, 0.0),
                '0_f1': metrics.get('0_f1-score', 0.0),
                '1_f1': metrics.get('1_f1-score', 0.0),
                '2_f1': metrics.get('2_f1-score', 0.0),
                'run_name': run.data.tags.get('mlflow.runName')
            })

        df_results = pd.DataFrame(run_data)
        
        # Create composite score: average of accuracy and weighted F1
        df_results['composite_score'] = (df_results['accuracy'] + df_results['weighted_f1']) / 2
        
        # Sort by composite score
        df_results = df_results.sort_values(by='composite_score', ascending=False)

        print("\n" + "="*80)
        print(f"TOP 10 MODELS (Sorted by Composite Score: Avg of Accuracy & Weighted F1)")
        print("="*80)
        print(df_results[['algo_name', 'imbalance_handling', 'accuracy', 
                         'weighted_f1', 'composite_score', '2_f1']].head(10).to_string(index=False))
        
        # Rankings by different metrics
        print("\n" + "="*80)
        print("RANKINGS BY DIFFERENT METRICS")
        print("="*80)
        
        print("\nüìä Top 5 by Accuracy:")
        top_acc = df_results.nlargest(5, 'accuracy')
        for idx, row in top_acc.iterrows():
            print(f"  {row['algo_name']:25s} ({row['imbalance_handling']:15s}) - Accuracy: {row['accuracy']:.4f}")
        
        print("\nüìä Top 5 by Weighted F1-Score:")
        top_f1 = df_results.nlargest(5, 'weighted_f1')
        for idx, row in top_f1.iterrows():
            print(f"  {row['algo_name']:25s} ({row['imbalance_handling']:15s}) - Weighted F1: {row['weighted_f1']:.4f}")
        
        print("\nüìä Top 5 by Minority Class (2) F1-Score:")
        top_minority = df_results.nlargest(5, '2_f1')
        for idx, row in top_minority.iterrows():
            print(f"  {row['algo_name']:25s} ({row['imbalance_handling']:15s}) - Class 2 F1: {row['2_f1']:.4f}")
        
        # Algorithm + Sampling Method Analysis
        print("\n" + "="*80)
        print("BEST SAMPLING METHOD FOR EACH ALGORITHM")
        print("="*80)
        for algo in df_results['algo_name'].unique():
            if algo != 'N/A':
                algo_df = df_results[df_results['algo_name'] == algo]
                best_row = algo_df.iloc[0]
                print(f"\n{algo}:")
                print(f"  Best Sampling: {best_row['imbalance_handling']}")
                print(f"  Accuracy: {best_row['accuracy']:.4f}")
                print(f"  Weighted F1: {best_row['weighted_f1']:.4f}")
                print(f"  Minority F1: {best_row['2_f1']:.4f}")
        
        print("\n" + "="*80)
        print("üèÜ BEST OVERALL MODEL (by Composite Score)")
        print("="*80)
        best = df_results.iloc[0]
        print(f"Algorithm: {best['algo_name']}")
        print(f"Imbalance Handling: {best['imbalance_handling']}")
        print(f"Composite Score: {best['composite_score']:.4f}")
        print(f"Accuracy: {best['accuracy']:.4f}")
        print(f"Weighted F1-Score: {best['weighted_f1']:.4f}")
        print(f"Class 0 F1: {best['0_f1']:.4f}")
        print(f"Class 1 F1: {best['1_f1']:.4f}")
        print(f"Class 2 (Minority) F1: {best['2_f1']:.4f}")
        print(f"Run ID: {best['run_id']}")
        print("="*80)
        
        print("\nüí° Key Insights:")
        print("   ‚úì Each algorithm trained on undersampling, oversampling, and ADASYN")
        print("   ‚úì Best model selected from all 12 combinations (4 algos √ó 3 methods)")
        print("   ‚úì Composite Score balances accuracy with class-wise F1 performance")
        print("   ‚úì This approach avoids ensemble voting that may reduce performance")

except Exception as e:
    print(f"Error: {e}")


FETCHING RESULTS FROM EXPERIMENT

TOP 10 MODELS (Sorted by Composite Score: Avg of Accuracy & Weighted F1)
         algo_name imbalance_handling  accuracy  weighted_f1  composite_score     2_f1
          LightGBM             adasyn  0.790809     0.787167         0.788988 0.658624
         LinearSVC       oversampling  0.785627     0.783244         0.784435 0.662496
         LinearSVC      undersampling  0.784672     0.782359         0.783516 0.665629
          LightGBM       oversampling  0.783854     0.780722         0.782288 0.653920
          LightGBM      undersampling  0.781808     0.779463         0.780636 0.658393
LogisticRegression      undersampling  0.780990     0.779422         0.780206 0.659724
LogisticRegression       oversampling  0.779899     0.778274         0.779086 0.654291
         LinearSVC             adasyn  0.779626     0.777610         0.778618 0.644207
LogisticRegression             adasyn  0.774853     0.773705         0.774279 0.638048
           XGBoost    