In [1]:
%pip install -q optuna lightgbm

Note: you may need to restart the kernel to use updated packages.


This experiment compares the performance of several classification algorithms (Logistic Regression, Naive Bayes, SVM, XGBoost, LightGBM). We apply Hyperparameter Tuning (HPT) to the complex models using Optuna, while fixing the feature engineering pipeline based on previous optimal choices:

* **Vectorization:** TF-IDF (Term Frequency-Inverse Document Frequency)
* **N-gram Range:** Bigram `(1, 2)` (Unigrams and Bigrams)
* **Max Features:** 1000
* **Imbalance Handling:** Undersampling (`RandomUnderSampler`)

## 1. Setup and Dependencies

### 1.1 Import Libraries

In [2]:
import optuna
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, ADASYN
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

## 2. MLflow and Data Preparation

### 2.1 MLflow Configuration

In [3]:
# Set the remote tracking server URI
mlflow.set_tracking_uri("http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/")

# Set or create a new experiment
mlflow.set_experiment("Model Comparision (TFIDF Bigram 1000 + Ensemble Imbalance) - Exp 5")

2025/12/11 18:30:59 INFO mlflow.tracking.fluent: Experiment with name 'Model Comparision (TFIDF Bigram 1000 + Ensemble Imbalance) - Exp 5' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlfow-bucket-2025/120831546946697659', creation_time=1765458058614, experiment_id='120831546946697659', last_update_time=1765458058614, lifecycle_stage='active', name='Model Comparision (TFIDF Bigram 1000 + Ensemble Imbalance) - Exp 5', tags={}>

### 2.2 Data Loading, Remapping, and Feature Engineering

In [4]:
df = pd.read_csv('../data/reddit_preprocessing.csv').dropna(subset=['clean_comment'])
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})
df = df.dropna(subset=['category'])

print(f"Data shape: {df.shape}")
print(f"Class distribution:\n{df['category'].value_counts().sort_index()}")

# Fixed parameters
ngram_range = (1, 2)
max_features = 1000

# Split and vectorize
X_train, X_test, y_train, y_test = train_test_split(
    df['clean_comment'], df['category'], 
    test_size=0.2, random_state=42, stratify=df['category']
)

vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

print(f"\nOriginal Training Data Shape: {X_train_vec.shape}")
print(f"Test Data Shape: {X_test_vec.shape}")

Data shape: (36662, 2)
Class distribution:
category
0    12644
1    15770
2     8248
Name: count, dtype: int64

Original Training Data Shape: (29329, 1000)
Test Data Shape: (7333, 1000)


### Resampled Data

In [5]:
print("\n" + "="*80)
print("CREATING 3 RESAMPLED DATASETS (TOP 3 IMBALANCE METHODS)")
print("="*80)

# Method 1: Undersampling
sampler_under = RandomUnderSampler(random_state=42)
X_train_under, y_train_under = sampler_under.fit_resample(X_train_vec, y_train)
print(f"1. Undersampled: {X_train_under.shape}")

# Method 2: Oversampling
sampler_over = RandomOverSampler(random_state=42)
X_train_over, y_train_over = sampler_over.fit_resample(X_train_vec, y_train)
print(f"2. Oversampled: {X_train_over.shape}")

# Method 3: ADASYN
sampler_adasyn = ADASYN(random_state=42)
X_train_adasyn, y_train_adasyn = sampler_adasyn.fit_resample(X_train_vec, y_train)
print(f"3. ADASYN: {X_train_adasyn.shape}")

# Store all datasets
datasets = {
    'undersampling': (X_train_under, y_train_under),
    'oversampling': (X_train_over, y_train_over),
    'adasyn': (X_train_adasyn, y_train_adasyn)
}


CREATING 3 RESAMPLED DATASETS (TOP 3 IMBALANCE METHODS)
1. Undersampled: (19794, 1000)
2. Oversampled: (37848, 1000)
3. ADASYN: (35909, 1000)


## 3. MLflow Logging and Evaluation Helper

In [6]:
def log_mlflow(model_name, model, params=None, imbalance_method="undersampling"):
    with mlflow.start_run():
        mlflow.set_tag("mlflow.runName", f"{model_name}_{imbalance_method}_TFIDF(1000)_HPT")
        mlflow.set_tag("experiment_type", "multi_algo_hpt")
        mlflow.log_param("algo_name", model_name)
        mlflow.log_param("vectorizer_type", "TF-IDF")
        mlflow.log_param("ngram_range", str(ngram_range))
        mlflow.log_param("max_features", max_features)
        mlflow.log_param("imbalance_handling", imbalance_method)
        
        if params:
            for key, value in params.items():
                mlflow.log_param(key, value)

        # Predict
        y_pred = model.predict(X_test_vec)

        # Log metrics
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)

        classification_rep = classification_report(y_test, y_pred, output_dict=True)
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Confusion matrix
        conf_matrix = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
        plt.title(f"Confusion Matrix: {model_name} ({imbalance_method})")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.savefig(f"conf_matrix_{model_name}_{imbalance_method}.png")
        mlflow.log_artifact(f"conf_matrix_{model_name}_{imbalance_method}.png")
        plt.close()

        mlflow.sklearn.log_model(model, f"{model_name}_model")
        
        print(f"    ‚úì Logged with Accuracy: {accuracy:.4f}")

## 4. Hyperparameter Tuning Objectives (Optuna)

In [7]:
N_TRIALS = 10

def tune_logistic_regression(X_train, y_train):
    def objective(trial):
        C = trial.suggest_float('C', 1e-3, 10.0, log=True)
        solver = trial.suggest_categorical('solver', ['liblinear', 'lbfgs'])
        model = LogisticRegression(C=C, solver=solver, random_state=42, multi_class='auto', max_iter=1000)
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

def tune_linear_svc(X_train, y_train):
    def objective(trial):
        C = trial.suggest_float('C', 0.1, 10.0, log=True)
        model = LinearSVC(C=C, random_state=42, max_iter=1000, dual='auto')
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

def tune_xgboost(X_train, y_train):
    def objective(trial):
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
        max_depth = trial.suggest_int('max_depth', 3, 7)
        model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth,
                             random_state=42, use_label_encoder=False, eval_metric='mlogloss', n_jobs=-1)
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

def tune_lightgbm(X_train, y_train):
    def objective(trial):
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
        num_leaves = trial.suggest_int('num_leaves', 10, 50)
        model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, num_leaves=num_leaves,
                              random_state=42, verbose=-1)
        model.fit(X_train, y_train)
        return accuracy_score(y_test, model.predict(X_test_vec))
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=N_TRIALS, show_progress_bar=False)
    return study.best_params

## 5. Execution and MLflow Logging

In [8]:
def train_individual_models_with_hpt(model_name, tune_func):
    """
    Train separate models on each resampling method and log each one.
    This allows us to find which algorithm + sampling method combination works best.
    """
    print(f"\n{'='*80}")
    print(f"TRAINING {model_name} ON ALL 3 SAMPLING METHODS")
    print(f"{'='*80}")
    
    for method, (X_resample, y_resample) in datasets.items():
        print(f"\n  [{method.upper()}] Running HPT ({N_TRIALS} trials)...")
        
        # Tune hyperparameters
        best_params = tune_func(X_resample, y_resample)
        print(f"    Best params: {best_params}")
        
        # Train model with best params
        if model_name == 'LogisticRegression':
            model = LogisticRegression(random_state=42, multi_class='auto', max_iter=1000, **best_params)
        elif model_name == 'LinearSVC':
            model = LinearSVC(random_state=42, max_iter=1000, dual='auto', **best_params)
        elif model_name == 'XGBoost':
            model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss', n_jobs=-1, **best_params)
        elif model_name == 'LightGBM':
            model = LGBMClassifier(random_state=42, verbose=-1, **best_params)
        
        model.fit(X_resample, y_resample)
        
        # Log each model separately
        log_mlflow(f"{model_name}", model, params=best_params, imbalance_method=method)
    
    print(f"{'='*80}")

# Execute Training
# ============================================================================
print("\n" + "="*80)
print("STARTING MODEL TRAINING WITH ALL SAMPLING METHODS")
print("="*80)
print("Strategy: Train each algorithm on all 3 sampling methods separately")
print("This creates 12 models (4 algorithms √ó 3 sampling methods)")
print("MLflow will track all and identify the best combination")
print("="*80)

# Train baseline
print(f"\n{'='*80}")
print("BASELINE: MultinomialNB (No HPT)")
print(f"{'='*80}")
mnb = MultinomialNB(alpha=1.0)
mnb.fit(X_train_under, y_train_under)
log_mlflow('MultinomialNB', mnb, params={'alpha': 1.0}, imbalance_method='undersampling')

# Train all models on all sampling methods individually
models_to_tune = [
    ('LogisticRegression', tune_logistic_regression),
    ('LinearSVC', tune_linear_svc),
    ('XGBoost', tune_xgboost),
    ('LightGBM', tune_lightgbm)
]

for model_name, tune_func in models_to_tune:
    train_individual_models_with_hpt(model_name, tune_func)

print("\n" + "="*80)
print("‚úì ALL MODELS TRAINED AND LOGGED")
print("="*80)


STARTING MODEL TRAINING WITH ALL SAMPLING METHODS
Strategy: Train each algorithm on all 3 sampling methods separately
This creates 12 models (4 algorithms √ó 3 sampling methods)
MLflow will track all and identify the best combination

BASELINE: MultinomialNB (No HPT)




    ‚úì Logged with Accuracy: 0.7000
üèÉ View run MultinomialNB_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/d168744c79bb45eca4eeb2b6e397b6ed
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:32:29,523] A new study created in memory with name: no-name-a141a419-c73f-4b59-aa7c-864c3b61892e
[I 2025-12-11 18:32:29,586] Trial 0 finished with value: 0.6687576708032184 and parameters: {'C': 0.005483115775663099, 'solver': 'liblinear'}. Best is trial 0 with value: 0.6687576708032184.
[I 2025-12-11 18:32:29,637] Trial 1 finished with value: 0.6742124642029184 and parameters: {'C': 0.009550223105061338, 'solver': 'liblinear'}. Best is trial 1 with value: 0.6742124642029184.
[I 2025-12-11 18:32:29,707] Trial 2 finished with value: 0.7404882040092732 and parameters: {'C': 0.14773286727600024, 'solver': 'liblinear'}. Best is trial 2 with value: 0.7404882040092732.



TRAINING LogisticRegression ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 18:32:29,825] Trial 3 finished with value: 0.7455338879039957 and parameters: {'C': 0.12487814090553052, 'solver': 'lbfgs'}. Best is trial 3 with value: 0.7455338879039957.
[I 2025-12-11 18:32:29,920] Trial 4 finished with value: 0.7421246420291832 and parameters: {'C': 0.10494184839269742, 'solver': 'lbfgs'}. Best is trial 3 with value: 0.7455338879039957.
[I 2025-12-11 18:32:29,984] Trial 5 finished with value: 0.7278058093549706 and parameters: {'C': 0.05682541431745017, 'solver': 'lbfgs'}. Best is trial 3 with value: 0.7455338879039957.
[I 2025-12-11 18:32:30,019] Trial 6 finished with value: 0.6661666439383608 and parameters: {'C': 0.002823059023703137, 'solver': 'liblinear'}. Best is trial 3 with value: 0.7455338879039957.
[I 2025-12-11 18:32:30,083] Trial 7 finished with value: 0.7363971089594982 and parameters: {'C': 0.1224960970059363, 'solver': 'liblinear'}. Best is trial 3 with value: 0.7455338879039957.
[I 2025-12-11 18:32:30,194] Trial 8 finished with value: 

    Best params: {'C': 2.1653073230109707, 'solver': 'liblinear'}




    ‚úì Logged with Accuracy: 0.7810
üèÉ View run LogisticRegression_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/9115e563a1f04d14b5776489d830fc46
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:33:19,841] A new study created in memory with name: no-name-89622f93-5a3d-40a4-861c-f32396a38a7e
[I 2025-12-11 18:33:19,927] Trial 0 finished with value: 0.6649393154234283 and parameters: {'C': 0.0014108107026286978, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.6649393154234283.



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 18:33:20,354] Trial 1 finished with value: 0.7820810036819855 and parameters: {'C': 2.643463618683302, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7820810036819855.
[I 2025-12-11 18:33:20,628] Trial 2 finished with value: 0.7818082640120005 and parameters: {'C': 0.742975414137399, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7820810036819855.
[I 2025-12-11 18:33:20,794] Trial 3 finished with value: 0.7190781399154507 and parameters: {'C': 0.028605525069124667, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7820810036819855.
[I 2025-12-11 18:33:21,184] Trial 4 finished with value: 0.7809900450020455 and parameters: {'C': 7.726726117027922, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7820810036819855.
[I 2025-12-11 18:33:21,308] Trial 5 finished with value: 0.6988954043365607 and parameters: {'C': 0.014880547063656192, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7820810036819855.
[I 2025-12-11 18:33:21,468] Trial 6 finished with

    Best params: {'C': 2.643463618683302, 'solver': 'liblinear'}




    ‚úì Logged with Accuracy: 0.7821
üèÉ View run LogisticRegression_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/698664d76c8f40929640e38afee765e0
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:34:13,952] A new study created in memory with name: no-name-39af08d5-ac05-4b3d-bc94-7846a8227631
[I 2025-12-11 18:34:14,098] Trial 0 finished with value: 0.6946679394517933 and parameters: {'C': 0.012283267524728244, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.6946679394517933.



  [ADASYN] Running HPT (10 trials)...


[I 2025-12-11 18:34:14,390] Trial 1 finished with value: 0.7741715532524206 and parameters: {'C': 2.675201296086194, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7741715532524206.
[I 2025-12-11 18:34:14,529] Trial 2 finished with value: 0.6309832265102959 and parameters: {'C': 0.0059198868479968135, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7741715532524206.
[I 2025-12-11 18:34:14,755] Trial 3 finished with value: 0.7443065593890631 and parameters: {'C': 0.07202060786835728, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7741715532524206.
[I 2025-12-11 18:34:15,317] Trial 4 finished with value: 0.7718532660575481 and parameters: {'C': 3.6607733308810486, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.7741715532524206.
[I 2025-12-11 18:34:15,568] Trial 5 finished with value: 0.7650347743079231 and parameters: {'C': 0.4886378996214002, 'solver': 'liblinear'}. Best is trial 1 with value: 0.7741715532524206.
[I 2025-12-11 18:34:15,670] Trial 6 finished with value

    Best params: {'C': 2.675201296086194, 'solver': 'liblinear'}




    ‚úì Logged with Accuracy: 0.7742
üèÉ View run LogisticRegression_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/0401370285cf47c392245c782119333d
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:35:07,107] A new study created in memory with name: no-name-9cd5a082-a3c1-46cf-8c0b-3af45ac936ba
[I 2025-12-11 18:35:07,265] Trial 0 finished with value: 0.783035592526933 and parameters: {'C': 1.133325557667224}. Best is trial 0 with value: 0.783035592526933.



TRAINING LinearSVC ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 18:35:07,423] Trial 1 finished with value: 0.7831719623619255 and parameters: {'C': 1.2346461001601727}. Best is trial 1 with value: 0.7831719623619255.
[I 2025-12-11 18:35:07,584] Trial 2 finished with value: 0.7835810718669031 and parameters: {'C': 1.9104955545783033}. Best is trial 2 with value: 0.7835810718669031.
[I 2025-12-11 18:35:07,728] Trial 3 finished with value: 0.7834447020319105 and parameters: {'C': 1.598854139519303}. Best is trial 2 with value: 0.7835810718669031.
[I 2025-12-11 18:35:07,907] Trial 4 finished with value: 0.7839901813718806 and parameters: {'C': 3.565030152983778}. Best is trial 4 with value: 0.7839901813718806.
[I 2025-12-11 18:35:08,035] Trial 5 finished with value: 0.7849447702168281 and parameters: {'C': 0.4344550210538833}. Best is trial 5 with value: 0.7849447702168281.
[I 2025-12-11 18:35:08,132] Trial 6 finished with value: 0.7805809354970681 and parameters: {'C': 0.14563936837104702}. Best is trial 5 with value: 0.7849447702168281.

    Best params: {'C': 0.4344550210538833}




    ‚úì Logged with Accuracy: 0.7849
üèÉ View run LinearSVC_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/47b99e9414b747adba0a4f74e0852bfc
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:35:56,620] A new study created in memory with name: no-name-83f0a13d-6bd2-4247-b016-c95546525a04



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 18:35:57,047] Trial 0 finished with value: 0.7856266193917906 and parameters: {'C': 1.5018954478935285}. Best is trial 0 with value: 0.7856266193917906.
[I 2025-12-11 18:35:57,531] Trial 1 finished with value: 0.785217509886813 and parameters: {'C': 4.277625839518987}. Best is trial 0 with value: 0.7856266193917906.
[I 2025-12-11 18:35:57,762] Trial 2 finished with value: 0.7842629210418656 and parameters: {'C': 0.123376269083637}. Best is trial 0 with value: 0.7856266193917906.
[I 2025-12-11 18:35:58,266] Trial 3 finished with value: 0.785217509886813 and parameters: {'C': 4.769428383397109}. Best is trial 0 with value: 0.7856266193917906.
[I 2025-12-11 18:35:58,692] Trial 4 finished with value: 0.7858993590617755 and parameters: {'C': 1.177671774968524}. Best is trial 4 with value: 0.7858993590617755.
[I 2025-12-11 18:35:58,982] Trial 5 finished with value: 0.7846720305468431 and parameters: {'C': 0.4193869483671542}. Best is trial 4 with value: 0.7858993590617755.
[I 2

    Best params: {'C': 1.177671774968524}




    ‚úì Logged with Accuracy: 0.7859
üèÉ View run LinearSVC_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/4a876f32e32d4f2bb2fd9401a1af2778
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:36:48,258] A new study created in memory with name: no-name-fe753b54-cf5f-42af-88bf-429a6bc21cd5



  [ADASYN] Running HPT (10 trials)...


[I 2025-12-11 18:36:48,829] Trial 0 finished with value: 0.7782626483021956 and parameters: {'C': 1.9927007662852156}. Best is trial 0 with value: 0.7782626483021956.
[I 2025-12-11 18:36:49,484] Trial 1 finished with value: 0.7792172371471431 and parameters: {'C': 4.13333693298873}. Best is trial 1 with value: 0.7792172371471431.
[I 2025-12-11 18:36:49,962] Trial 2 finished with value: 0.7785353879721806 and parameters: {'C': 2.0276751766581005}. Best is trial 1 with value: 0.7792172371471431.
[I 2025-12-11 18:36:50,722] Trial 3 finished with value: 0.7785353879721806 and parameters: {'C': 2.4566861766079104}. Best is trial 1 with value: 0.7792172371471431.
[I 2025-12-11 18:36:51,275] Trial 4 finished with value: 0.7788081276421656 and parameters: {'C': 3.4916185394595476}. Best is trial 1 with value: 0.7792172371471431.
[I 2025-12-11 18:36:51,525] Trial 5 finished with value: 0.7751261420973681 and parameters: {'C': 0.1765121910052164}. Best is trial 1 with value: 0.7792172371471431.


    Best params: {'C': 7.292693025205307}




    ‚úì Logged with Accuracy: 0.7796
üèÉ View run LinearSVC_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/819224f866eb48b69b61ac283839a525
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:37:43,696] A new study created in memory with name: no-name-8851ae05-c244-467a-a935-045d321653a7



TRAINING XGBoost ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 18:37:49,832] Trial 0 finished with value: 0.5105686622119188 and parameters: {'n_estimators': 96, 'learning_rate': 0.001318301118772021, 'max_depth': 3}. Best is trial 0 with value: 0.5105686622119188.
[I 2025-12-11 18:38:27,613] Trial 1 finished with value: 0.7374880676394382 and parameters: {'n_estimators': 148, 'learning_rate': 0.06946333259232494, 'max_depth': 7}. Best is trial 1 with value: 0.7374880676394382.
[I 2025-12-11 18:38:50,843] Trial 2 finished with value: 0.605345697531706 and parameters: {'n_estimators': 79, 'learning_rate': 0.007595395527123069, 'max_depth': 7}. Best is trial 1 with value: 0.7374880676394382.
[I 2025-12-11 18:39:13,993] Trial 3 finished with value: 0.6103913814264285 and parameters: {'n_estimators': 65, 'learning_rate': 0.012491313805561972, 'max_depth': 7}. Best is trial 1 with value: 0.7374880676394382.
[I 2025-12-11 18:39:23,620] Trial 4 finished with value: 0.5559798172644211 and parameters: {'n_estimators': 54, 'learning_rate': 0.0

    Best params: {'n_estimators': 148, 'learning_rate': 0.06946333259232494, 'max_depth': 7}




    ‚úì Logged with Accuracy: 0.7375
üèÉ View run XGBoost_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/dd69b4f8321f45c59139bf124232ff7a
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:42:47,817] A new study created in memory with name: no-name-9c0d7290-9db9-4be3-982a-2607b5861abd



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 18:43:02,077] Trial 0 finished with value: 0.5859811809627711 and parameters: {'n_estimators': 89, 'learning_rate': 0.012118973021886328, 'max_depth': 4}. Best is trial 0 with value: 0.5859811809627711.
[I 2025-12-11 18:44:20,177] Trial 1 finished with value: 0.7265784808400382 and parameters: {'n_estimators': 176, 'learning_rate': 0.04101989822527027, 'max_depth': 7}. Best is trial 1 with value: 0.7265784808400382.
[I 2025-12-11 18:44:28,016] Trial 2 finished with value: 0.5160234556116187 and parameters: {'n_estimators': 67, 'learning_rate': 0.0044010780976561645, 'max_depth': 3}. Best is trial 1 with value: 0.7265784808400382.
[I 2025-12-11 18:45:02,983] Trial 3 finished with value: 0.5563889267693987 and parameters: {'n_estimators': 80, 'learning_rate': 0.0010293010196797366, 'max_depth': 6}. Best is trial 1 with value: 0.7265784808400382.
[I 2025-12-11 18:45:53,500] Trial 4 finished with value: 0.7428064912041457 and parameters: {'n_estimators': 111, 'learning_rate':

    Best params: {'n_estimators': 111, 'learning_rate': 0.09321739378249722, 'max_depth': 7}




    ‚úì Logged with Accuracy: 0.7428
üèÉ View run XGBoost_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/e4496076a7524ea4ac3435b6bd401a7d
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:49:34,999] A new study created in memory with name: no-name-73d33038-de7f-4a9d-87e7-05a895022568



  [ADASYN] Running HPT (10 trials)...


[I 2025-12-11 18:50:09,342] Trial 0 finished with value: 0.6436656211645984 and parameters: {'n_estimators': 180, 'learning_rate': 0.013942850267303656, 'max_depth': 4}. Best is trial 0 with value: 0.6436656211645984.
[I 2025-12-11 18:50:38,406] Trial 1 finished with value: 0.6315287058502659 and parameters: {'n_estimators': 58, 'learning_rate': 0.018815131774211638, 'max_depth': 6}. Best is trial 0 with value: 0.6436656211645984.
[I 2025-12-11 18:51:18,050] Trial 2 finished with value: 0.5557070775944362 and parameters: {'n_estimators': 186, 'learning_rate': 0.0015973589890464592, 'max_depth': 4}. Best is trial 0 with value: 0.6436656211645984.
[I 2025-12-11 18:52:23,920] Trial 3 finished with value: 0.565525705713896 and parameters: {'n_estimators': 196, 'learning_rate': 0.0018753844942454535, 'max_depth': 5}. Best is trial 0 with value: 0.6436656211645984.
[I 2025-12-11 18:53:47,840] Trial 4 finished with value: 0.6736669848629483 and parameters: {'n_estimators': 181, 'learning_rate

    Best params: {'n_estimators': 54, 'learning_rate': 0.09558195759031882, 'max_depth': 7}




    ‚úì Logged with Accuracy: 0.7131
üèÉ View run XGBoost_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/f37abc4edd35448cb038c06e2943f9f3
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 18:58:43,561] A new study created in memory with name: no-name-6f406df8-9dc8-462b-b775-3bcad3f9c8fd



TRAINING LightGBM ON ALL 3 SAMPLING METHODS

  [UNDERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 18:58:45,725] Trial 0 finished with value: 0.600981862811946 and parameters: {'n_estimators': 101, 'learning_rate': 0.002340225615337289, 'num_leaves': 10}. Best is trial 0 with value: 0.600981862811946.
[I 2025-12-11 18:58:53,002] Trial 1 finished with value: 0.7815355243420156 and parameters: {'n_estimators': 151, 'learning_rate': 0.07012188878384279, 'num_leaves': 36}. Best is trial 1 with value: 0.7815355243420156.
[I 2025-12-11 18:58:55,329] Trial 2 finished with value: 0.7443065593890631 and parameters: {'n_estimators': 84, 'learning_rate': 0.04694998201599227, 'num_leaves': 17}. Best is trial 1 with value: 0.7815355243420156.
[I 2025-12-11 18:58:57,698] Trial 3 finished with value: 0.7650347743079231 and parameters: {'n_estimators': 72, 'learning_rate': 0.07667283937068846, 'num_leaves': 19}. Best is trial 1 with value: 0.7815355243420156.
[I 2025-12-11 18:59:04,528] Trial 4 finished with value: 0.781126414837038 and parameters: {'n_estimators': 110, 'learning_rate

    Best params: {'n_estimators': 151, 'learning_rate': 0.07012188878384279, 'num_leaves': 36}




    ‚úì Logged with Accuracy: 0.7815
üèÉ View run LightGBM_undersampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/f9ff6466cecd40c3ba83c8acb028ace6
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 19:00:38,907] A new study created in memory with name: no-name-1583c0e0-5a09-448a-9a09-9626b82edc5c



  [OVERSAMPLING] Running HPT (10 trials)...


[I 2025-12-11 19:00:48,650] Trial 0 finished with value: 0.790126823946543 and parameters: {'n_estimators': 168, 'learning_rate': 0.0869345833365426, 'num_leaves': 23}. Best is trial 0 with value: 0.790126823946543.
[I 2025-12-11 19:01:08,113] Trial 1 finished with value: 0.7867175780717305 and parameters: {'n_estimators': 193, 'learning_rate': 0.09964924184743591, 'num_leaves': 48}. Best is trial 0 with value: 0.790126823946543.
[I 2025-12-11 19:01:24,505] Trial 2 finished with value: 0.778671757807173 and parameters: {'n_estimators': 137, 'learning_rate': 0.02319395090836669, 'num_leaves': 48}. Best is trial 0 with value: 0.790126823946543.
[I 2025-12-11 19:01:34,944] Trial 3 finished with value: 0.7303968362198282 and parameters: {'n_estimators': 129, 'learning_rate': 0.00897887900602612, 'num_leaves': 30}. Best is trial 0 with value: 0.790126823946543.
[I 2025-12-11 19:01:41,643] Trial 4 finished with value: 0.7633983362880131 and parameters: {'n_estimators': 82, 'learning_rate': 0

    Best params: {'n_estimators': 168, 'learning_rate': 0.0869345833365426, 'num_leaves': 23}




    ‚úì Logged with Accuracy: 0.7901
üèÉ View run LightGBM_oversampling_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/bc846ad7390b4960a4cfaa0783a70bda
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659


[I 2025-12-11 19:04:14,867] A new study created in memory with name: no-name-6ea81106-e35d-4f2a-bfb0-f877dfd59de2



  [ADASYN] Running HPT (10 trials)...


[I 2025-12-11 19:04:24,450] Trial 0 finished with value: 0.7054411564162008 and parameters: {'n_estimators': 104, 'learning_rate': 0.0012928930714582298, 'num_leaves': 28}. Best is trial 0 with value: 0.7054411564162008.
[I 2025-12-11 19:04:29,015] Trial 1 finished with value: 0.771307786717578 and parameters: {'n_estimators': 87, 'learning_rate': 0.06575695372532099, 'num_leaves': 17}. Best is trial 1 with value: 0.771307786717578.
[I 2025-12-11 19:04:33,139] Trial 2 finished with value: 0.6694395199781809 and parameters: {'n_estimators': 67, 'learning_rate': 0.002584842410186994, 'num_leaves': 16}. Best is trial 1 with value: 0.771307786717578.
[I 2025-12-11 19:04:44,067] Trial 3 finished with value: 0.7478521750988681 and parameters: {'n_estimators': 77, 'learning_rate': 0.005900864771285144, 'num_leaves': 50}. Best is trial 1 with value: 0.771307786717578.
[I 2025-12-11 19:04:58,196] Trial 4 finished with value: 0.7148506750306832 and parameters: {'n_estimators': 151, 'learning_rat

    Best params: {'n_estimators': 79, 'learning_rate': 0.08980961188201096, 'num_leaves': 15}




    ‚úì Logged with Accuracy: 0.7742
üèÉ View run LightGBM_adasyn_TFIDF(1000)_HPT at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659/runs/3daa031883e24884b8011f4db9ed0efe
üß™ View experiment at: http://ec2-54-211-18-166.compute-1.amazonaws.com:5000/#/experiments/120831546946697659

‚úì ALL MODELS TRAINED AND LOGGED


## 6. Conclusion and Next Steps
The complete set of model performances, including optimized hyperparameters, is now logged in the MLflow UI. The next step is typically stacking or selecting the single best performing model based on comprehensive evaluation metrics, especially F1-scores for the minority classes.

In [9]:
OPTIMAL_METRIC = "weighted avg_f1-score"

try:
    client = mlflow.tracking.MlflowClient()
    experiment = client.get_experiment_by_name("Model Comparision (TFIDF Bigram 1000 + Ensemble Imbalance) - Exp 5")
    
    if experiment:
        experiment_id = experiment.experiment_id
        print(f"\n{'='*80}")
        print(f"FETCHING RESULTS FROM EXPERIMENT")
        print(f"{'='*80}")

        runs = client.search_runs(experiment_ids=experiment_id)
        run_data = []
        
        for run in runs:
            metrics = run.data.metrics
            params = run.data.params
            
            run_data.append({
                'run_id': run.info.run_id,
                'algo_name': params.get('algo_name', 'N/A'),
                'imbalance_handling': params.get('imbalance_handling', 'N/A'),
                'accuracy': metrics.get('accuracy', 0.0),
                'weighted_f1': metrics.get(OPTIMAL_METRIC, 0.0),
                '0_f1': metrics.get('0_f1-score', 0.0),
                '1_f1': metrics.get('1_f1-score', 0.0),
                '2_f1': metrics.get('2_f1-score', 0.0),
                'run_name': run.data.tags.get('mlflow.runName')
            })

        df_results = pd.DataFrame(run_data)
        
        # Create composite score: average of accuracy and weighted F1
        df_results['composite_score'] = (df_results['accuracy'] + df_results['weighted_f1']) / 2
        
        # Sort by composite score
        df_results = df_results.sort_values(by='composite_score', ascending=False)

        print("\n" + "="*80)
        print(f"TOP 10 MODELS (Sorted by Composite Score: Avg of Accuracy & Weighted F1)")
        print("="*80)
        print(df_results[['algo_name', 'imbalance_handling', 'accuracy', 
                         'weighted_f1', 'composite_score', '2_f1']].head(10).to_string(index=False))
        
        # Rankings by different metrics
        print("\n" + "="*80)
        print("RANKINGS BY DIFFERENT METRICS")
        print("="*80)
        
        print("\nüìä Top 5 by Accuracy:")
        top_acc = df_results.nlargest(5, 'accuracy')
        for idx, row in top_acc.iterrows():
            print(f"  {row['algo_name']:25s} ({row['imbalance_handling']:15s}) - Accuracy: {row['accuracy']:.4f}")
        
        print("\nüìä Top 5 by Weighted F1-Score:")
        top_f1 = df_results.nlargest(5, 'weighted_f1')
        for idx, row in top_f1.iterrows():
            print(f"  {row['algo_name']:25s} ({row['imbalance_handling']:15s}) - Weighted F1: {row['weighted_f1']:.4f}")
        
        print("\nüìä Top 5 by Minority Class (2) F1-Score:")
        top_minority = df_results.nlargest(5, '2_f1')
        for idx, row in top_minority.iterrows():
            print(f"  {row['algo_name']:25s} ({row['imbalance_handling']:15s}) - Class 2 F1: {row['2_f1']:.4f}")
        
        # Algorithm + Sampling Method Analysis
        print("\n" + "="*80)
        print("BEST SAMPLING METHOD FOR EACH ALGORITHM")
        print("="*80)
        for algo in df_results['algo_name'].unique():
            if algo != 'N/A':
                algo_df = df_results[df_results['algo_name'] == algo]
                best_row = algo_df.iloc[0]
                print(f"\n{algo}:")
                print(f"  Best Sampling: {best_row['imbalance_handling']}")
                print(f"  Accuracy: {best_row['accuracy']:.4f}")
                print(f"  Weighted F1: {best_row['weighted_f1']:.4f}")
                print(f"  Minority F1: {best_row['2_f1']:.4f}")
        
        print("\n" + "="*80)
        print("üèÜ BEST OVERALL MODEL (by Composite Score)")
        print("="*80)
        best = df_results.iloc[0]
        print(f"Algorithm: {best['algo_name']}")
        print(f"Imbalance Handling: {best['imbalance_handling']}")
        print(f"Composite Score: {best['composite_score']:.4f}")
        print(f"Accuracy: {best['accuracy']:.4f}")
        print(f"Weighted F1-Score: {best['weighted_f1']:.4f}")
        print(f"Class 0 F1: {best['0_f1']:.4f}")
        print(f"Class 1 F1: {best['1_f1']:.4f}")
        print(f"Class 2 (Minority) F1: {best['2_f1']:.4f}")
        print(f"Run ID: {best['run_id']}")
        print("="*80)
        
        print("\nüí° Key Insights:")
        print("   ‚úì Each algorithm trained on undersampling, oversampling, and ADASYN")
        print("   ‚úì Best model selected from all 12 combinations (4 algos √ó 3 methods)")
        print("   ‚úì Composite Score balances accuracy with class-wise F1 performance")
        print("   ‚úì This approach avoids ensemble voting that may reduce performance")

except Exception as e:
    print(f"Error: {e}")


FETCHING RESULTS FROM EXPERIMENT

TOP 10 MODELS (Sorted by Composite Score: Avg of Accuracy & Weighted F1)
         algo_name imbalance_handling  accuracy  weighted_f1  composite_score     2_f1
          LightGBM       oversampling  0.790127     0.787081         0.788604 0.657987
         LinearSVC       oversampling  0.785899     0.783413         0.784656 0.662285
         LinearSVC      undersampling  0.784945     0.782788         0.783867 0.666874
LogisticRegression       oversampling  0.782081     0.780060         0.781071 0.657169
          LightGBM      undersampling  0.781536     0.779756         0.780646 0.657283
LogisticRegression      undersampling  0.780990     0.779356         0.780173 0.659314
         LinearSVC             adasyn  0.779626     0.777590         0.778608 0.643982
LogisticRegression             adasyn  0.774172     0.773069         0.773621 0.635243
          LightGBM             adasyn  0.774172     0.770442         0.772307 0.650081
           XGBoost    