In [1]:
# Imports

import plotly
import optuna
import mlflow
import dagshub
import mlflow.sklearn

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from imblearn.under_sampling import RandomUnderSampler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Setting up DagsHub

dagshub.init(repo_owner='SushrutGaikwad', repo_name='youtube-comments-analyzer', mlflow=True)

# Data

In [3]:
PREPROCESSED_DATA_PATH = "../data/processed/reddit_preprocessed.csv"
df = pd.read_csv(PREPROCESSED_DATA_PATH)
df.dropna(subset=["clean_comment"], inplace=True)
df.shape

(36662, 2)

# Running the experiment

In [4]:
# Setting experiment name

mlflow.set_experiment("Exp 5: Detailed hyperparameter tuning")

<Experiment: artifact_location='mlflow-artifacts:/50f3ef58d8344cd3bd0def333af39f2d', creation_time=1749058038347, experiment_id='6', last_update_time=1749064965442, lifecycle_stage='active', name='Exp 5: Detailed hyperparameter tuning', tags={}>

## Preprocessing

In [5]:
# Remapping class labels from {-1, 0, 1} to {2, 0, 1}
mapping = {
    -1: 2,
    0: 0,
    1: 1
}
df["category"] = df["category"].map(mapping)

# Removing missing sentiments
df.dropna(subset=["category"], inplace=True)

# Feature engineering
ngram_range = (1, 2)
max_features = 1000
vectorizer = CountVectorizer(
    ngram_range=ngram_range,
    max_features=max_features
)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    df["clean_comment"],
    df["category"],
    test_size=0.2,
    random_state=42,
    stratify=df["category"]
)
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Undersampling
rus = RandomUnderSampler(random_state=42)
X_train_vectorized, y_train = rus.fit_resample(
    X_train_vectorized,
    y_train
)

## Running the experiment

In [6]:
# Function to log results to MLFlow
def log_to_mlflow(
    model_name,
    model,
    X_train,
    X_test,
    y_train,
    y_test,
    params,
    trial_number
):
    with mlflow.start_run():
        # Tags
        mlflow.set_tag(
            "mlflow.runName", f"Trial_{trial_number}_{model_name}_undersampling_BoW_1000_bigrams"
        )
        mlflow.set_tag("experiment_type", "ML_models_comparison")
        
        # Logging model name as a parameter
        mlflow.log_param("model_name", model_name)
        
        # Logging hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)
        
        # Initializing and training the model
        model.fit(X_train, y_train)
        
        # Making predictions on the test set and logging metrics
        y_pred = model.predict(X_test)
        
        # Logging accuracy
        accuracy = accuracy_score(
            y_true=y_test,
            y_pred=y_pred
        )
        mlflow.log_metric("accuracy", accuracy)
        
        # Logging classification report metrics
        classification_rep = classification_report(
            y_true=y_test,
            y_pred=y_pred,
            output_dict=True
        )
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}: {metric}", value)
        
        # Logging the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")
        
        return accuracy

In [7]:
def objective(trial):
    # Hyperparameter space
    C = trial.suggest_float("C", 1e-4, 10.0, log=True)
    penalty = trial.suggest_categorical("penalty", ["l2", "l1"])
    solver = trial.suggest_categorical("solver", ["liblinear", "saga"])
    
    params = {
        "C": C,
        "penalty": penalty,
        "solver": solver
    }
    
    model = LogisticRegression(
        C=C,
        penalty=penalty,
        solver=solver,
        max_iter=1000,
        multi_class="ovr",
        random_state=42
    )
    
    # Logging each trial as a separate run
    accuracy = log_to_mlflow(
        model_name="LogisticRegression",
        model=model,
        X_train=X_train_vectorized,
        X_test=X_test_vectorized,
        y_train=y_train,
        y_test=y_test,
        params=params,
        trial_number=trial.number
    )
    
    return accuracy

In [8]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-06-05 05:19:05,450] A new study created in memory with name: no-name-bf25100f-3518-442c-8fa7-4f1082bf8cb1


üèÉ View run Trial_0_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/57f017b1c03a41ebb9b27ab004f43e34
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:19:32,276] Trial 0 finished with value: 0.775671621437338 and parameters: {'C': 3.8419050827736068, 'penalty': 'l2', 'solver': 'liblinear'}. Best is trial 0 with value: 0.775671621437338.


üèÉ View run Trial_1_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/808a80bae5064cb1bafd66b3eb8b7515
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:20:09,015] Trial 1 finished with value: 0.7856266193917906 and parameters: {'C': 0.41999867426830273, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_2_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/caeba05b2f2349438461f43ee2eb4a26
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:20:46,411] Trial 2 finished with value: 0.7762171007773081 and parameters: {'C': 8.365570191733694, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_3_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/0b63236e24a445f8a194511572c85dd5
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:21:23,072] Trial 3 finished with value: 0.6259375426155734 and parameters: {'C': 0.0015208693773514981, 'penalty': 'l2', 'solver': 'liblinear'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_4_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/852e08fb3c2d444d86b86e202aeb95d6
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:22:00,343] Trial 4 finished with value: 0.6736669848629483 and parameters: {'C': 0.0062386867030733905, 'penalty': 'l2', 'solver': 'saga'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_5_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/b6cdde0e05e749fab241594a203b09ad
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:22:37,210] Trial 5 finished with value: 0.7856266193917906 and parameters: {'C': 0.41952777647511036, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_6_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/545c7fa021264633b18989ed6b83b1ad
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:23:14,074] Trial 6 finished with value: 0.6403927451247784 and parameters: {'C': 0.002953790953669244, 'penalty': 'l2', 'solver': 'saga'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_7_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/4b21ea365c834cf390dd8f3a86128e9f
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:23:51,245] Trial 7 finished with value: 0.7755352516023456 and parameters: {'C': 5.247470673777928, 'penalty': 'l2', 'solver': 'saga'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_8_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/7a67da28eed84bd8ab7749b683093854
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:24:28,105] Trial 8 finished with value: 0.7209873176053457 and parameters: {'C': 0.0477131143709537, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 1 with value: 0.7856266193917906.


üèÉ View run Trial_9_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/315551909f17481198093d545060bf4e
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:25:05,073] Trial 9 finished with value: 0.7858993590617755 and parameters: {'C': 0.4317145430154221, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


üèÉ View run Trial_10_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/2056f10e45eb483bb33a63a89bd47473
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:25:42,008] Trial 10 finished with value: 0.4301104595663439 and parameters: {'C': 0.00010099492274538453, 'penalty': 'l1', 'solver': 'saga'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_11_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/e372cfaad26a453fa9abf92933baea54
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:26:19,312] Trial 11 finished with value: 0.781671894177008 and parameters: {'C': 0.23101110581142037, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_12_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/6e3e645e2033481ea9db98bdbd691253
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:26:56,011] Trial 12 finished with value: 0.785490249556798 and parameters: {'C': 0.46116583550053997, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_13_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/cd445a69c86a406cbb2c9f1930153b10
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:27:33,068] Trial 13 finished with value: 0.7381699168144007 and parameters: {'C': 0.05957857287336037, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_14_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/9befc5b2e84d47d0aa396d531cabb342
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:28:10,519] Trial 14 finished with value: 0.7823537433519705 and parameters: {'C': 1.2209715022180858, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_15_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/5a65aeaa1e90427bbb31f2066013828b
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:28:47,075] Trial 15 finished with value: 0.769944088367653 and parameters: {'C': 0.12277564199125432, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_16_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/2495e6ae86264fcda357c1042bb64dc1
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:29:24,042] Trial 16 finished with value: 0.6243011045956635 and parameters: {'C': 0.013204007445494584, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_17_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/32e6fc8e19b542bb9e3fa30c79a9b08b
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:30:01,111] Trial 17 finished with value: 0.781671894177008 and parameters: {'C': 1.3710910105883949, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_18_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/5ef6ee8cdce646dcaedaef4a8d06b875
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:32:17,916] Trial 18 finished with value: 0.7804445656620755 and parameters: {'C': 1.1744433488379071, 'penalty': 'l1', 'solver': 'saga'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_19_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/16660fa7277c497d9a5c4ceb97cb6817
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:32:54,883] Trial 19 finished with value: 0.6705304786581209 and parameters: {'C': 0.022786655815418687, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_20_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/3f8d005b923349bb87e116757a9f8d7e
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:33:31,850] Trial 20 finished with value: 0.7722623755625255 and parameters: {'C': 0.13288610723449168, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 9 with value: 0.7858993590617755.


üèÉ View run Trial_21_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/991c0bfba8304775b09f3436cc062cfd
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:34:08,834] Trial 21 finished with value: 0.7864448384017455 and parameters: {'C': 0.371563357763713, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_22_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/d9adad04695547c492ff8f94e4533019
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:34:45,859] Trial 22 finished with value: 0.786035728896768 and parameters: {'C': 0.3906325195144528, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_23_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/f255c5941c444377bb093ca38ab1e268
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:35:22,889] Trial 23 finished with value: 0.779762716487113 and parameters: {'C': 2.133293068727951, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_24_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/80ac4f1deb4842b9a41d059d10136a72
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:35:59,923] Trial 24 finished with value: 0.7593072412382381 and parameters: {'C': 0.09519397886734654, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_25_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/78b570801663471caf1c4e4ec08ce4e4
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:36:37,097] Trial 25 finished with value: 0.7846720305468431 and parameters: {'C': 0.6685448753547212, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_26_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/918dc87c91a142389a7d9e500f62bc66
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:37:13,852] Trial 26 finished with value: 0.7714441565525706 and parameters: {'C': 0.21506338112365503, 'penalty': 'l2', 'solver': 'saga'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_27_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/aa17a7a8f92f4dc5bbe21d61a3238db9
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:37:50,898] Trial 27 finished with value: 0.778671757807173 and parameters: {'C': 2.58328955019369, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


üèÉ View run Trial_28_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/baf3ec2924b849b787b8ffdd96d93f04
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:38:27,988] Trial 28 finished with value: 0.41292786035728896 and parameters: {'C': 0.0007714411270844446, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_29_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/500093177df4495f9b17fd5e9ea983dd
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:39:04,853] Trial 29 finished with value: 0.7278058093549706 and parameters: {'C': 0.027677911776279995, 'penalty': 'l2', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_30_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/0bc0d4d106854c47af3b1c6e7edf766e
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:39:42,024] Trial 30 finished with value: 0.7839901813718806 and parameters: {'C': 0.2587639222389289, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_31_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/78ff594badcf4b2e8939be63deae722b
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:40:19,706] Trial 31 finished with value: 0.7848084003818355 and parameters: {'C': 0.6117298507133914, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_32_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/eb4823f64bf248ffa066ada67fb31ccf
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:40:56,007] Trial 32 finished with value: 0.7778535387972181 and parameters: {'C': 4.232917621524278, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_33_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/1efccb671d0449e1924f351c7fe7ee59
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:41:32,923] Trial 33 finished with value: 0.7848084003818355 and parameters: {'C': 0.632408323490214, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_34_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/8c0575153bf44a869f5914db6260c841
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:42:09,831] Trial 34 finished with value: 0.7846720305468431 and parameters: {'C': 0.2724667453775898, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_35_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/de13e9fa2f664f498116146eb7740bd6
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:42:47,111] Trial 35 finished with value: 0.7752625119323605 and parameters: {'C': 6.497009151645639, 'penalty': 'l2', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_36_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/d61494d8eb97484b8b19dbf19f2743d5
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:43:24,570] Trial 36 finished with value: 0.7428064912041457 and parameters: {'C': 0.06403208756496695, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_37_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/a7fc2c47f5ee4ba1a83685484de2d934
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:44:01,299] Trial 37 finished with value: 0.7749897722623755 and parameters: {'C': 9.543864973825583, 'penalty': 'l2', 'solver': 'saga'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_38_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/911d5a97b03343d79b83184855da626a
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:44:37,818] Trial 38 finished with value: 0.6129824082912859 and parameters: {'C': 0.011744591080212806, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_39_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/350592572f9947c191b46543ae7c6a58
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:46:34,025] Trial 39 finished with value: 0.7792172371471431 and parameters: {'C': 2.268036137168019, 'penalty': 'l1', 'solver': 'saga'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_40_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/f3b1878cfc5948e28225805e3f0144e8
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:47:11,101] Trial 40 finished with value: 0.7767625801172781 and parameters: {'C': 1.0100721390331915, 'penalty': 'l2', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_41_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/f0ce8b1f8472453eba976b3ec61462eb
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:47:48,015] Trial 41 finished with value: 0.7856266193917906 and parameters: {'C': 0.3933539717093809, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_42_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/fa304108559e436dbcdc391abae7a7f5
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:48:25,020] Trial 42 finished with value: 0.7747170325923906 and parameters: {'C': 0.16221536331472067, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_43_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/7af652c4bde54ec4b6947a070d5d0f45
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:49:02,013] Trial 43 finished with value: 0.7863084685667531 and parameters: {'C': 0.36356871856785905, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_44_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/d07110f787294296b168cfb82b0f27c1
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:49:39,016] Trial 44 finished with value: 0.7863084685667531 and parameters: {'C': 0.36204213428906684, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_45_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/26961097775f4e06b94373fdddd5c53a
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:50:16,335] Trial 45 finished with value: 0.7576708032183281 and parameters: {'C': 0.09134647384440132, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_46_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/bd843a48d6c544929a22fad45fbed220
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:50:53,010] Trial 46 finished with value: 0.7858993590617755 and parameters: {'C': 0.4033069140799129, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_47_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/920c7dca627a4541baf9a8182222e6e5
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:51:30,015] Trial 47 finished with value: 0.7837174417018955 and parameters: {'C': 0.7981538505808767, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_48_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/76393f9a223642b9a73413dcc46f6b23
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:52:07,384] Trial 48 finished with value: 0.7147143051956907 and parameters: {'C': 0.0440512752400271, 'penalty': 'l1', 'solver': 'saga'}. Best is trial 21 with value: 0.7864448384017455.


üèÉ View run Trial_49_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/c3d16eb2f513497db9522ef0738b8143
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:52:44,389] Trial 49 finished with value: 0.7858993590617755 and parameters: {'C': 0.3407772951905502, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 21 with value: 0.7864448384017455.


In [9]:
# Getting the best parameters
best_params = study.best_params
best_model = LogisticRegression(
    C=best_params["C"],
    penalty=best_params["penalty"],
    solver=best_params["solver"],
    max_iter=1000,
    multi_class="ovr",
    random_state=42
)

# Logging the best model
log_to_mlflow(
    model_name="LogisticRegression",
    model=best_model,
    X_train=X_train_vectorized,
    X_test=X_test_vectorized,
    y_train=y_train,
    y_test=y_test,
    params=best_params,
    trial_number="best"
)



üèÉ View run Trial_best_LogisticRegression_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/489b50bce8cd4f5b8705486f9e879d34
üß™ View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


0.7864448384017455

In [10]:
# Plotting parameter importance
optuna.visualization.plot_param_importances(study).show()

# Plotting optimization history
optuna.visualization.plot_optimization_history(study).show()