In [1]:
# Imports

import plotly
import optuna
import mlflow
import dagshub
import mlflow.sklearn

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from imblearn.under_sampling import RandomUnderSampler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Setting up DagsHub

dagshub.init(repo_owner='SushrutGaikwad', repo_name='youtube-comments-analyzer', mlflow=True)

# Data

In [3]:
PREPROCESSED_DATA_PATH = "../data/processed/reddit_preprocessed.csv"
df = pd.read_csv(PREPROCESSED_DATA_PATH)
df.dropna(subset=["clean_comment"], inplace=True)
df.shape

(36662, 2)

# Running the experiment

In [4]:
# Setting experiment name

mlflow.set_experiment("Exp 5: Detailed hyperparameter tuning")

<Experiment: artifact_location='mlflow-artifacts:/50f3ef58d8344cd3bd0def333af39f2d', creation_time=1749058038347, experiment_id='6', last_update_time=1749064965442, lifecycle_stage='active', name='Exp 5: Detailed hyperparameter tuning', tags={}>

## Preprocessing

In [5]:
# Remapping class labels from {-1, 0, 1} to {2, 0, 1}
mapping = {
    -1: 2,
    0: 0,
    1: 1
}
df["category"] = df["category"].map(mapping)

# Removing missing sentiments
df.dropna(subset=["category"], inplace=True)

# Feature engineering
ngram_range = (1, 2)
max_features = 1000
vectorizer = CountVectorizer(
    ngram_range=ngram_range,
    max_features=max_features
)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    df["clean_comment"],
    df["category"],
    test_size=0.2,
    random_state=42,
    stratify=df["category"]
)
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Undersampling
rus = RandomUnderSampler(random_state=42)
X_train_vectorized, y_train = rus.fit_resample(
    X_train_vectorized,
    y_train
)

## Running the experiment

In [6]:
# Function to log results to MLFlow
def log_to_mlflow(
    model_name,
    model,
    X_train,
    X_test,
    y_train,
    y_test,
    params,
    trial_number
):
    with mlflow.start_run():
        # Tags
        mlflow.set_tag(
            "mlflow.runName", f"Trial_{trial_number}_{model_name}_undersampling_BoW_1000_bigrams"
        )
        mlflow.set_tag("experiment_type", "ML_models_comparison")
        
        # Logging model name as a parameter
        mlflow.log_param("model_name", model_name)
        
        # Logging hyperparameters
        for key, value in params.items():
            mlflow.log_param(key, value)
        
        # Initializing and training the model
        model.fit(X_train, y_train)
        
        # Making predictions on the test set and logging metrics
        y_pred = model.predict(X_test)
        
        # Logging accuracy
        accuracy = accuracy_score(
            y_true=y_test,
            y_pred=y_pred
        )
        mlflow.log_metric("accuracy", accuracy)
        
        # Logging classification report metrics
        classification_rep = classification_report(
            y_true=y_test,
            y_pred=y_pred,
            output_dict=True
        )
        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}: {metric}", value)
        
        # Logging the model
        mlflow.sklearn.log_model(model, f"{model_name}_model")
        
        return accuracy

In [7]:
def objective(trial):
    # Hyperparameter space
    C = trial.suggest_float("C", 1e-2, 100.0, log=True)
    kernel = trial.suggest_categorical(
        "kernel", ["linear", "rbf", "poly", "sigmoid"]
    )
    gamma = trial.suggest_categorical(
        "gamma", ["scale", "auto"]
    )
    if kernel == "poly":
        degree = trial.suggest_int("degree", 2, 5)
    else:
        degree = 3
    
    params = {
        "C": C,
        "kernel": kernel,
        "gamma": gamma,
        "degree": degree
    }
    
    model = SVC(
        C=C,
        kernel=kernel,
        gamma=gamma,
        degree=degree,
        random_state=42
    )
    
    # Logging each trial as a separate run
    accuracy = log_to_mlflow(
        model_name="SVM",
        model=model,
        X_train=X_train_vectorized,
        X_test=X_test_vectorized,
        y_train=y_train,
        y_test=y_test,
        params=params,
        trial_number=trial.number
    )
    
    return accuracy

In [8]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-06-05 03:51:28,324] A new study created in memory with name: no-name-780ba638-463a-4b55-90c5-669447586cd2


🏃 View run Trial_0_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/e43e4974fb50470388aff2549b43de3c
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 03:52:24,661] Trial 0 finished with value: 0.6787126687576708 and parameters: {'C': 0.010387072018315745, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 0 with value: 0.6787126687576708.


🏃 View run Trial_1_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/81a4003be5ec4fd9ac64d8e4b47de83a
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 03:53:26,306] Trial 1 finished with value: 0.6789854084276558 and parameters: {'C': 5.3874117297405135, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 1 with value: 0.6789854084276558.


🏃 View run Trial_2_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/67543b99c671439483e756a53a52c302
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:01:16,524] Trial 2 finished with value: 0.7744442929224056 and parameters: {'C': 14.063474943456162, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 2 with value: 0.7744442929224056.


🏃 View run Trial_3_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/53c45fe431324216972891efcba02551
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:02:41,858] Trial 3 finished with value: 0.7781262784672031 and parameters: {'C': 0.5739288319137371, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 3 with value: 0.7781262784672031.


🏃 View run Trial_4_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/4a6dcf2b56364b2983c1f55f4886432e
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:03:54,840] Trial 4 finished with value: 0.38306286649393156 and parameters: {'C': 48.56329977126172, 'kernel': 'poly', 'gamma': 'auto', 'degree': 2}. Best is trial 3 with value: 0.7781262784672031.


🏃 View run Trial_5_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/e1ce72f303124f3986b1b3618b2e5efe
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:05:34,266] Trial 5 finished with value: 0.7057138960861857 and parameters: {'C': 19.0036205853721, 'kernel': 'poly', 'gamma': 'scale', 'degree': 2}. Best is trial 3 with value: 0.7781262784672031.


🏃 View run Trial_6_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/d33784d0d1e844c9b9f99d81a392e5ca
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:06:17,155] Trial 6 finished with value: 0.5512068730396836 and parameters: {'C': 67.98616271765268, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 3 with value: 0.7781262784672031.


🏃 View run Trial_7_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/fc311feb78f449eaacf2d669cd1f8ede
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:07:19,849] Trial 7 finished with value: 0.6768034910677758 and parameters: {'C': 10.496959579771987, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 3 with value: 0.7781262784672031.


🏃 View run Trial_8_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/b5cfa590c6d94eec8c435cfa0c59d521
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:31:19,809] Trial 8 finished with value: 0.771580526387563 and parameters: {'C': 59.58363294064534, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 3 with value: 0.7781262784672031.


🏃 View run Trial_9_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/e1ce0465270646a0a492477f2cc0ca67
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:32:52,380] Trial 9 finished with value: 0.7479885449338606 and parameters: {'C': 65.46909981935391, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 3 with value: 0.7781262784672031.


🏃 View run Trial_10_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/481a0bf64d934e9abee09d85f4836e85
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:33:47,397] Trial 10 finished with value: 0.7794899768171281 and parameters: {'C': 0.2372995619871679, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 10 with value: 0.7794899768171281.


🏃 View run Trial_11_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/0af582296e074caca2b2196bfba352ea
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:34:42,766] Trial 11 finished with value: 0.7794899768171281 and parameters: {'C': 0.19222452766114684, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 10 with value: 0.7794899768171281.


🏃 View run Trial_12_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/c85a659c5a0245f2b6dd559bc1d1b2b6
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:35:35,280] Trial 12 finished with value: 0.7796263466521206 and parameters: {'C': 0.17723576086220455, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_13_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/000e2926af92431e98480684a4f17baa
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:36:25,858] Trial 13 finished with value: 0.7635347061230056 and parameters: {'C': 0.06322780778520554, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_14_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/0273c7c1300c4b61851d26d7737304bc
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:37:42,205] Trial 14 finished with value: 0.7768989499522706 and parameters: {'C': 1.2453831467982484, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_15_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/5444333fc3894aa78cb9abe8b4feea60
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:38:29,924] Trial 15 finished with value: 0.7612164189281331 and parameters: {'C': 0.06049350917361268, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_16_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/184c524ed0174e529ea9402e6f6b149b
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:39:11,670] Trial 16 finished with value: 0.5707077594436111 and parameters: {'C': 1.755231807305903, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_17_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/4f3007d2ec5a4612aa4f31392fa0689d
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:40:09,458] Trial 17 finished with value: 0.7017591708714033 and parameters: {'C': 0.27709295114843285, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


🏃 View run Trial_18_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/8525a7d4a5ad430f88f50ae6e1aa072e
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:41:12,996] Trial 18 finished with value: 0.22610118641756444 and parameters: {'C': 0.052555259815384135, 'kernel': 'poly', 'gamma': 'auto', 'degree': 5}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_19_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/96bb0f59c8614f839ca5244f5157b8ee
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:42:04,714] Trial 19 finished with value: 0.6862130096822583 and parameters: {'C': 0.012184246939106283, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_20_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/4b01276742564fa7a50905ebbc292c5c
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:42:56,340] Trial 20 finished with value: 0.7796263466521206 and parameters: {'C': 0.1767395590949486, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_21_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/7cd26537e30340d4b1458ef539c63d87
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:43:48,476] Trial 21 finished with value: 0.7794899768171281 and parameters: {'C': 0.1836224362055619, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_22_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/0457f3541eaf4c3581d66403f6d83e35
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:44:54,798] Trial 22 finished with value: 0.7781262784672031 and parameters: {'C': 0.5295320761769526, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_23_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/7afd600c747243808b634e7c1912de44
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:45:45,356] Trial 23 finished with value: 0.7333969725896632 and parameters: {'C': 0.027158879470522614, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_24_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/ff738889e8c04cfaaf72f255bf9323da
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:46:35,715] Trial 24 finished with value: 0.774580662757398 and parameters: {'C': 0.10727660801422532, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_25_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/55648dc8e73e4d449cb4a79c9d9e29b8
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:48:23,481] Trial 25 finished with value: 0.7763534706123005 and parameters: {'C': 2.637759077103145, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_26_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/727f8c5be4ab46ee98114bff2bb94d1f
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:49:26,689] Trial 26 finished with value: 0.4632483294695213 and parameters: {'C': 0.4154142241022733, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_27_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/1a94192262a64005ba1efff03adf84d2
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:50:31,691] Trial 27 finished with value: 0.36560752761489157 and parameters: {'C': 0.1103275688352927, 'kernel': 'poly', 'gamma': 'scale', 'degree': 5}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_28_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/4b4bce88c4bf40efb04e2c8156c60502
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:51:33,450] Trial 28 finished with value: 0.4861584617482613 and parameters: {'C': 0.02931161632845883, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_29_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/8bd85345f7e1438ba302a6cd094f7842
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:52:52,675] Trial 29 finished with value: 0.777308059457248 and parameters: {'C': 1.1105903665269061, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_30_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/2e156aca315e4f92ae655ed4e9219144
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:53:49,584] Trial 30 finished with value: 0.778671757807173 and parameters: {'C': 0.28310536096732836, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 12 with value: 0.7796263466521206.


🏃 View run Trial_31_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/2286529680334f56bdc16c7940d80226
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:54:42,619] Trial 31 finished with value: 0.779762716487113 and parameters: {'C': 0.1888550087160269, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_32_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/3e91d647b7de492eb826d0b7fc556572
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:55:34,163] Trial 32 finished with value: 0.7758079912723306 and parameters: {'C': 0.11847693060195794, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_33_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/fdb05b02066d43a389fa9243c2bd2597
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:56:45,039] Trial 33 finished with value: 0.7781262784672031 and parameters: {'C': 0.7553415450939983, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_34_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/b9175770b85a4a6c9ca718529853ca94
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:57:45,508] Trial 34 finished with value: 0.778671757807173 and parameters: {'C': 0.33241811629931783, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_35_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/620234d8590842c291c1322a78b10b9f
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:58:36,488] Trial 35 finished with value: 0.7794899768171281 and parameters: {'C': 0.17554201796468402, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_36_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/509d72815d834e78bb74f7a3805b1a76
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 04:59:41,385] Trial 36 finished with value: 0.7730805945724806 and parameters: {'C': 2.7305111035631113, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_37_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/e97921fcae714f10adccf7a0c1a88214
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:00:42,634] Trial 37 finished with value: 0.3687440338197191 and parameters: {'C': 0.025259229126544307, 'kernel': 'poly', 'gamma': 'scale', 'degree': 3}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_38_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/9c124742fb984b8597d3087fde99e89e
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:01:52,053] Trial 38 finished with value: 0.7775807991272331 and parameters: {'C': 0.7031736059938236, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_39_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/a75de91db26146cc8022409679db774e
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:02:49,363] Trial 39 finished with value: 0.5745261148234011 and parameters: {'C': 0.08406822745386751, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_40_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/b66aeecbbf1b4cd7983a48450d0ca761
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:03:38,520] Trial 40 finished with value: 0.7512614209736806 and parameters: {'C': 0.04241765528851846, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_41_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/8405363aa4b042d6bdd8d1d9acbece78
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:04:30,300] Trial 41 finished with value: 0.7790808673121505 and parameters: {'C': 0.18581295425711378, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_42_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/eabd941dd4ad4d7f96214f2d7c053b7b
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:05:22,996] Trial 42 finished with value: 0.779762716487113 and parameters: {'C': 0.1880951304671555, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_43_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/ffcae2d5d274463a82ffe3ccc0b9905c
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:06:26,566] Trial 43 finished with value: 0.7789444974771581 and parameters: {'C': 0.44954489468106307, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_44_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/4823a994b66d42e4880620edcc8353b6
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:07:16,496] Trial 44 finished with value: 0.7789444974771581 and parameters: {'C': 0.14473206891676227, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_45_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/a9295b3bb6c14512b4c1fe578383f1e4
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:08:12,080] Trial 45 finished with value: 0.7792172371471431 and parameters: {'C': 0.26950853186767854, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_46_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/33209d782b474a7a9a5f734e8675a15f
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:09:01,169] Trial 46 finished with value: 0.7692622391926905 and parameters: {'C': 0.08087347984053009, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_47_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/88d7372481104ae0853f52d6778bccd0
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:10:29,035] Trial 47 finished with value: 0.7612164189281331 and parameters: {'C': 32.696494004189404, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_48_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/2ac33c27b3c14b8ea5dd6491358ef0ce
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:11:32,131] Trial 48 finished with value: 0.3915177962634665 and parameters: {'C': 0.8487968884362158, 'kernel': 'poly', 'gamma': 'scale', 'degree': 4}. Best is trial 31 with value: 0.779762716487113.


🏃 View run Trial_49_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/3549b0e8c7b643479ae6d0aede0f210c
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


[I 2025-06-05 05:12:31,253] Trial 49 finished with value: 0.5100231828719487 and parameters: {'C': 0.03974167545259752, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 31 with value: 0.779762716487113.


In [11]:
# Getting the best parameters
best_params = study.best_params
best_model = SVC(
    C=best_params["C"],
    kernel=best_params["kernel"],
    gamma=best_params["gamma"],
    # degree=best_params["degree"],
    random_state=42
)

# Logging the best model
log_to_mlflow(
    model_name="SVM",
    model=best_model,
    X_train=X_train_vectorized,
    X_test=X_test_vectorized,
    y_train=y_train,
    y_test=y_test,
    params=best_params,
    trial_number="best"
)



🏃 View run Trial_best_SVM_undersampling_BoW_1000_bigrams at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6/runs/01082bec725b4e9c98df3a1cfbcc2c22
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/youtube-comments-analyzer.mlflow/#/experiments/6


0.779762716487113

In [12]:
# Plotting parameter importance
optuna.visualization.plot_param_importances(study).show()

# Plotting optimization history
optuna.visualization.plot_optimization_history(study).show()