In [3]:
import dagshub
dagshub.init(repo_owner='Anmol25', repo_name='youtube-sentiment-analysis', mlflow=True)

import mlflow
mlflow.set_tracking_uri("https://dagshub.com/Anmol25/youtube-sentiment-analysis.mlflow")

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=201aaea0-2705-4065-a256-d947e3c45814&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=f17a86304e7efee252ab1263d383ce69d5d88ab961fe813e820c53d900f3c132




In [4]:
import numpy as np
import pandas as pd

In [5]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/sentiment-preprocessed/sentiments_preprocessed.csv


In [6]:
df = pd.read_csv("/kaggle/input/sentiment-preprocessed/sentiments_preprocessed.csv")
df.head()

Unnamed: 0,clean_comment,category
0,cant believe modi,0
1,karachi total blackout,0
2,couldnt done year modi year increasing unemplo...,0
3,modi talk world tallest statue talk world larg...,-1
4,major announcement modi everyone waiting game ...,-1


In [7]:
df.isna().sum()

clean_comment    0
category         0
dtype: int64

In [9]:
mlflow.set_experiment("Exp - 5 Catboost Hyperparameter tuning")

<Experiment: artifact_location='mlflow-artifacts:/1ef67cec830548058f1d3c771b473248', creation_time=1735138212241, experiment_id='12', last_update_time=1735138212241, lifecycle_stage='active', name='Exp - 5 Catboost Hyperparameter tuning', tags={}>

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from mlflow.models.signature import infer_signature
from catboost import CatBoostClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [11]:
import optuna

In [12]:
## Remaping outputs
df['category'] = df['category'].map({-1: 2, 0: 0, 1: 1})

In [13]:
ngram_range = (1, 1)  # Unigram setting
max_features = 9000

# Vectorization using TF-IDF with 9000 max features
vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)

X_train, X_test, y_train, y_test = train_test_split(df['clean_comment'], df['category'], test_size=0.2, random_state=42, stratify=df['category'])

X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

In [14]:
def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 1000, 2000),
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 0.3),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 2, 6),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.0),
        'verbose': True,
        "task_type": "GPU",
        "devices": "0",
        "early_stopping_rounds": 50
    }

    model = CatBoostClassifier(**params)

    scores = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy')

    return np.mean(scores)

In [15]:
study = optuna.create_study(direction="maximize")
study.optimize(objective,n_trials=10)

[I 2024-12-26 06:32:47,962] A new study created in memory with name: no-name-5f21ec63-5339-4168-949d-778359c4d286


0:	learn: 1.0589891	total: 17.1s	remaining: 6h 33s
1:	learn: 1.0305206	total: 17.1s	remaining: 3h 38s
2:	learn: 1.0091453	total: 17.1s	remaining: 2h 37s
3:	learn: 0.9913669	total: 17.2s	remaining: 1h 30m 36s
4:	learn: 0.9765515	total: 17.2s	remaining: 1h 12m 36s
5:	learn: 0.9635217	total: 17.3s	remaining: 1h 36s
6:	learn: 0.9517513	total: 17.3s	remaining: 52m 1s
7:	learn: 0.9403266	total: 17.3s	remaining: 45m 34s
8:	learn: 0.9301226	total: 17.4s	remaining: 40m 34s
9:	learn: 0.9206075	total: 17.4s	remaining: 36m 33s
10:	learn: 0.9114194	total: 17.5s	remaining: 33m 17s
11:	learn: 0.9024862	total: 17.5s	remaining: 30m 33s
12:	learn: 0.8941127	total: 17.5s	remaining: 28m 14s
13:	learn: 0.8865693	total: 17.6s	remaining: 26m 15s
14:	learn: 0.8795965	total: 17.6s	remaining: 24m 32s
15:	learn: 0.8729116	total: 17.6s	remaining: 23m 2s
16:	learn: 0.8660925	total: 17.7s	remaining: 21m 42s
17:	learn: 0.8593321	total: 17.7s	remaining: 20m 31s
18:	learn: 0.8527518	total: 17.8s	remaining: 19m 28s
19:

[I 2024-12-26 06:35:15,354] Trial 0 finished with value: 0.8784314952091999 and parameters: {'iterations': 1269, 'depth': 6, 'learning_rate': 0.29669128113792165, 'l2_leaf_reg': 3.194375405718128, 'bagging_temperature': 0.9010182088228286}. Best is trial 0 with value: 0.8784314952091999.


0:	learn: 1.0725098	total: 32.1ms	remaining: 47.2s
1:	learn: 1.0517225	total: 54.7ms	remaining: 40.2s
2:	learn: 1.0361460	total: 76.9ms	remaining: 37.7s
3:	learn: 1.0223648	total: 99.2ms	remaining: 36.4s
4:	learn: 1.0100962	total: 121ms	remaining: 35.6s
5:	learn: 0.9990908	total: 144ms	remaining: 35.1s
6:	learn: 0.9859193	total: 172ms	remaining: 36s
7:	learn: 0.9769113	total: 194ms	remaining: 35.4s
8:	learn: 0.9692635	total: 216ms	remaining: 35.1s
9:	learn: 0.9614982	total: 239ms	remaining: 34.9s
10:	learn: 0.9539478	total: 261ms	remaining: 34.6s
11:	learn: 0.9472512	total: 283ms	remaining: 34.4s
12:	learn: 0.9406879	total: 304ms	remaining: 34.2s
13:	learn: 0.9342396	total: 326ms	remaining: 34s
14:	learn: 0.9279700	total: 348ms	remaining: 33.8s
15:	learn: 0.9219746	total: 370ms	remaining: 33.7s
16:	learn: 0.9160392	total: 392ms	remaining: 33.6s
17:	learn: 0.9104850	total: 414ms	remaining: 33.4s
18:	learn: 0.9052825	total: 436ms	remaining: 33.3s
19:	learn: 0.9003769	total: 457ms	remaini

[I 2024-12-26 06:36:52,422] Trial 1 finished with value: 0.8796200723572404 and parameters: {'iterations': 1472, 'depth': 4, 'learning_rate': 0.2742783232455507, 'l2_leaf_reg': 2.0026557417146265, 'bagging_temperature': 0.6984833798695017}. Best is trial 1 with value: 0.8796200723572404.


0:	learn: 1.0710455	total: 155ms	remaining: 4m 28s
1:	learn: 1.0504092	total: 292ms	remaining: 4m 12s
2:	learn: 1.0329582	total: 442ms	remaining: 4m 14s
3:	learn: 1.0182014	total: 573ms	remaining: 4m 7s
4:	learn: 1.0059943	total: 709ms	remaining: 4m 4s
5:	learn: 0.9939440	total: 854ms	remaining: 4m 5s
6:	learn: 0.9832812	total: 980ms	remaining: 4m 1s
7:	learn: 0.9739046	total: 1.11s	remaining: 3m 59s
8:	learn: 0.9650061	total: 1.24s	remaining: 3m 57s
9:	learn: 0.9566650	total: 1.36s	remaining: 3m 55s
10:	learn: 0.9482384	total: 1.5s	remaining: 3m 55s
11:	learn: 0.9406096	total: 1.63s	remaining: 3m 54s
12:	learn: 0.9337309	total: 1.76s	remaining: 3m 52s
13:	learn: 0.9269933	total: 1.88s	remaining: 3m 50s
14:	learn: 0.9205297	total: 2s	remaining: 3m 48s
15:	learn: 0.9146055	total: 2.12s	remaining: 3m 47s
16:	learn: 0.9083739	total: 2.25s	remaining: 3m 46s
17:	learn: 0.9026970	total: 2.36s	remaining: 3m 44s
18:	learn: 0.8976046	total: 2.48s	remaining: 3m 43s
19:	learn: 0.8923484	total: 2.

[I 2024-12-26 06:46:22,195] Trial 2 finished with value: 0.8825498107660067 and parameters: {'iterations': 1732, 'depth': 9, 'learning_rate': 0.13987456868447948, 'l2_leaf_reg': 4.590124379465934, 'bagging_temperature': 0.2600196972090568}. Best is trial 2 with value: 0.8825498107660067.


0:	learn: 1.0657444	total: 70.1ms	remaining: 1m 24s
1:	learn: 1.0437292	total: 130ms	remaining: 1m 18s
2:	learn: 1.0253967	total: 183ms	remaining: 1m 13s
3:	learn: 1.0094202	total: 244ms	remaining: 1m 13s
4:	learn: 0.9946682	total: 308ms	remaining: 1m 14s
5:	learn: 0.9827213	total: 363ms	remaining: 1m 12s
6:	learn: 0.9719563	total: 417ms	remaining: 1m 11s
7:	learn: 0.9607030	total: 476ms	remaining: 1m 11s
8:	learn: 0.9508392	total: 529ms	remaining: 1m 10s
9:	learn: 0.9422078	total: 580ms	remaining: 1m 9s
10:	learn: 0.9340879	total: 633ms	remaining: 1m 9s
11:	learn: 0.9261570	total: 687ms	remaining: 1m 8s
12:	learn: 0.9186068	total: 740ms	remaining: 1m 8s
13:	learn: 0.9111339	total: 792ms	remaining: 1m 7s
14:	learn: 0.9043073	total: 844ms	remaining: 1m 7s
15:	learn: 0.8976383	total: 896ms	remaining: 1m 6s
16:	learn: 0.8915315	total: 945ms	remaining: 1m 6s
17:	learn: 0.8856792	total: 995ms	remaining: 1m 5s
18:	learn: 0.8798312	total: 1.04s	remaining: 1m 5s
19:	learn: 0.8742506	total: 1.1

[I 2024-12-26 06:49:15,672] Trial 3 finished with value: 0.8817261476546454 and parameters: {'iterations': 1211, 'depth': 7, 'learning_rate': 0.20806400370320505, 'l2_leaf_reg': 4.944373177433967, 'bagging_temperature': 0.046693359789032884}. Best is trial 2 with value: 0.8825498107660067.


0:	learn: 1.0800833	total: 36.9ms	remaining: 47.7s
1:	learn: 1.0649964	total: 66.2ms	remaining: 42.7s
2:	learn: 1.0521399	total: 88.5ms	remaining: 38s
3:	learn: 1.0412153	total: 117ms	remaining: 37.5s
4:	learn: 1.0315419	total: 139ms	remaining: 35.7s
5:	learn: 1.0221275	total: 161ms	remaining: 34.5s
6:	learn: 1.0136661	total: 183ms	remaining: 33.7s
7:	learn: 1.0061592	total: 206ms	remaining: 33s
8:	learn: 0.9994659	total: 228ms	remaining: 32.5s
9:	learn: 0.9925046	total: 254ms	remaining: 32.6s
10:	learn: 0.9865223	total: 276ms	remaining: 32.2s
11:	learn: 0.9803919	total: 299ms	remaining: 31.9s
12:	learn: 0.9745593	total: 322ms	remaining: 31.6s
13:	learn: 0.9688209	total: 344ms	remaining: 31.4s
14:	learn: 0.9636830	total: 366ms	remaining: 31.2s
15:	learn: 0.9584958	total: 389ms	remaining: 31s
16:	learn: 0.9538555	total: 412ms	remaining: 30.9s
17:	learn: 0.9492954	total: 434ms	remaining: 30.7s
18:	learn: 0.9446958	total: 457ms	remaining: 30.6s
19:	learn: 0.9404523	total: 479ms	remaining:

[I 2024-12-26 06:50:42,894] Trial 4 finished with value: 0.8737188910783731 and parameters: {'iterations': 1292, 'depth': 4, 'learning_rate': 0.18509027903889264, 'l2_leaf_reg': 2.8556484854795143, 'bagging_temperature': 0.386687351875194}. Best is trial 2 with value: 0.8825498107660067.


0:	learn: 1.0760853	total: 39.5ms	remaining: 57.9s
1:	learn: 1.0579550	total: 64ms	remaining: 46.9s
2:	learn: 1.0441532	total: 86.7ms	remaining: 42.3s
3:	learn: 1.0326032	total: 109ms	remaining: 39.8s
4:	learn: 1.0214929	total: 131ms	remaining: 38.4s
5:	learn: 1.0113589	total: 155ms	remaining: 37.6s
6:	learn: 1.0022644	total: 177ms	remaining: 36.9s
7:	learn: 0.9945549	total: 199ms	remaining: 36.4s
8:	learn: 0.9837525	total: 225ms	remaining: 36.4s
9:	learn: 0.9768240	total: 248ms	remaining: 36.1s
10:	learn: 0.9703335	total: 270ms	remaining: 35.8s
11:	learn: 0.9639356	total: 292ms	remaining: 35.4s
12:	learn: 0.9573031	total: 314ms	remaining: 35.1s
13:	learn: 0.9512295	total: 336ms	remaining: 34.9s
14:	learn: 0.9451722	total: 358ms	remaining: 34.7s
15:	learn: 0.9397928	total: 380ms	remaining: 34.5s
16:	learn: 0.9345997	total: 402ms	remaining: 34.3s
17:	learn: 0.9298821	total: 425ms	remaining: 34.2s
18:	learn: 0.9248195	total: 449ms	remaining: 34.2s
19:	learn: 0.9202804	total: 470ms	remain

[I 2024-12-26 06:52:20,933] Trial 5 finished with value: 0.8798390207792478 and parameters: {'iterations': 1467, 'depth': 4, 'learning_rate': 0.2312286289276321, 'l2_leaf_reg': 3.4405944726419952, 'bagging_temperature': 0.25152709577863963}. Best is trial 2 with value: 0.8825498107660067.


0:	learn: 1.0816414	total: 70.4ms	remaining: 1m 37s
1:	learn: 1.0681413	total: 130ms	remaining: 1m 29s
2:	learn: 1.0562374	total: 196ms	remaining: 1m 30s
3:	learn: 1.0461707	total: 256ms	remaining: 1m 28s
4:	learn: 1.0363060	total: 322ms	remaining: 1m 28s
5:	learn: 1.0277423	total: 381ms	remaining: 1m 27s
6:	learn: 1.0200128	total: 438ms	remaining: 1m 26s
7:	learn: 1.0127170	total: 492ms	remaining: 1m 24s
8:	learn: 1.0059234	total: 547ms	remaining: 1m 23s
9:	learn: 0.9990063	total: 611ms	remaining: 1m 23s
10:	learn: 0.9929559	total: 665ms	remaining: 1m 22s
11:	learn: 0.9872067	total: 719ms	remaining: 1m 22s
12:	learn: 0.9818192	total: 775ms	remaining: 1m 21s
13:	learn: 0.9764557	total: 829ms	remaining: 1m 21s
14:	learn: 0.9715198	total: 880ms	remaining: 1m 20s
15:	learn: 0.9665755	total: 933ms	remaining: 1m 19s
16:	learn: 0.9613945	total: 996ms	remaining: 1m 20s
17:	learn: 0.9568764	total: 1.05s	remaining: 1m 19s
18:	learn: 0.9527376	total: 1.1s	remaining: 1m 19s
19:	learn: 0.9485070	t

[I 2024-12-26 06:55:40,099] Trial 6 finished with value: 0.8693399226382242 and parameters: {'iterations': 1383, 'depth': 7, 'learning_rate': 0.10157676618631646, 'l2_leaf_reg': 5.451657608417495, 'bagging_temperature': 0.14086367323387616}. Best is trial 2 with value: 0.8825498107660067.


0:	learn: 1.0476810	total: 233ms	remaining: 5m 15s
1:	learn: 1.0136047	total: 471ms	remaining: 5m 18s
2:	learn: 0.9870149	total: 703ms	remaining: 5m 17s
3:	learn: 0.9675142	total: 910ms	remaining: 5m 7s
4:	learn: 0.9498091	total: 1.12s	remaining: 5m 2s
5:	learn: 0.9335261	total: 1.33s	remaining: 4m 59s
6:	learn: 0.9189968	total: 1.55s	remaining: 4m 57s
7:	learn: 0.9062351	total: 1.75s	remaining: 4m 53s
8:	learn: 0.8947681	total: 1.95s	remaining: 4m 51s
9:	learn: 0.8831540	total: 2.17s	remaining: 4m 51s
10:	learn: 0.8729240	total: 2.36s	remaining: 4m 48s
11:	learn: 0.8628716	total: 2.57s	remaining: 4m 47s
12:	learn: 0.8532185	total: 2.77s	remaining: 4m 45s
13:	learn: 0.8444955	total: 2.97s	remaining: 4m 44s
14:	learn: 0.8365384	total: 3.17s	remaining: 4m 43s
15:	learn: 0.8290969	total: 3.38s	remaining: 4m 42s
16:	learn: 0.8213809	total: 3.59s	remaining: 4m 42s
17:	learn: 0.8143132	total: 3.78s	remaining: 4m 41s
18:	learn: 0.8074431	total: 3.98s	remaining: 4m 39s
19:	learn: 0.8009407	tot

[I 2024-12-26 07:08:22,396] Trial 7 finished with value: 0.8806522577752754 and parameters: {'iterations': 1355, 'depth': 10, 'learning_rate': 0.2579855374617489, 'l2_leaf_reg': 3.797667810809768, 'bagging_temperature': 0.7958031559939924}. Best is trial 2 with value: 0.8825498107660067.


0:	learn: 1.0647990	total: 69.9ms	remaining: 1m 51s
1:	learn: 1.0423308	total: 129ms	remaining: 1m 42s
2:	learn: 1.0235727	total: 183ms	remaining: 1m 36s
3:	learn: 1.0072531	total: 241ms	remaining: 1m 35s
4:	learn: 0.9920127	total: 305ms	remaining: 1m 36s
5:	learn: 0.9799913	total: 358ms	remaining: 1m 34s
6:	learn: 0.9688077	total: 414ms	remaining: 1m 33s
7:	learn: 0.9573779	total: 478ms	remaining: 1m 34s
8:	learn: 0.9473375	total: 531ms	remaining: 1m 33s
9:	learn: 0.9384463	total: 582ms	remaining: 1m 32s
10:	learn: 0.9300691	total: 638ms	remaining: 1m 31s
11:	learn: 0.9220930	total: 689ms	remaining: 1m 30s
12:	learn: 0.9146501	total: 740ms	remaining: 1m 29s
13:	learn: 0.9073939	total: 794ms	remaining: 1m 29s
14:	learn: 0.9004017	total: 846ms	remaining: 1m 28s
15:	learn: 0.8937694	total: 896ms	remaining: 1m 28s
16:	learn: 0.8872468	total: 944ms	remaining: 1m 27s
17:	learn: 0.8812727	total: 993ms	remaining: 1m 26s
18:	learn: 0.8754232	total: 1.04s	remaining: 1m 26s
19:	learn: 0.8697612	

[I 2024-12-26 07:12:06,984] Trial 8 finished with value: 0.8847914255627495 and parameters: {'iterations': 1592, 'depth': 7, 'learning_rate': 0.21253472976495014, 'l2_leaf_reg': 3.328871477431899, 'bagging_temperature': 0.05819662811218529}. Best is trial 8 with value: 0.8847914255627495.


0:	learn: 1.0690015	total: 42.3ms	remaining: 1m 5s
1:	learn: 1.0472732	total: 72.5ms	remaining: 55.9s
2:	learn: 1.0305218	total: 102ms	remaining: 52.3s
3:	learn: 1.0158087	total: 130ms	remaining: 50.2s
4:	learn: 1.0029127	total: 159ms	remaining: 49.1s
5:	learn: 0.9891930	total: 193ms	remaining: 49.5s
6:	learn: 0.9793006	total: 222ms	remaining: 48.8s
7:	learn: 0.9698416	total: 251ms	remaining: 48.2s
8:	learn: 0.9610426	total: 280ms	remaining: 47.8s
9:	learn: 0.9526373	total: 309ms	remaining: 47.4s
10:	learn: 0.9448537	total: 337ms	remaining: 47s
11:	learn: 0.9372833	total: 366ms	remaining: 46.8s
12:	learn: 0.9300454	total: 394ms	remaining: 46.4s
13:	learn: 0.9231721	total: 425ms	remaining: 46.5s
14:	learn: 0.9167742	total: 452ms	remaining: 46.2s
15:	learn: 0.9106009	total: 481ms	remaining: 46s
16:	learn: 0.9050336	total: 509ms	remaining: 45.7s
17:	learn: 0.8992959	total: 536ms	remaining: 45.5s
18:	learn: 0.8940479	total: 563ms	remaining: 45.3s
19:	learn: 0.8890252	total: 591ms	remaining

[I 2024-12-26 07:14:12,887] Trial 9 finished with value: 0.8821223400373256 and parameters: {'iterations': 1546, 'depth': 5, 'learning_rate': 0.2555889010023592, 'l2_leaf_reg': 5.971015609730032, 'bagging_temperature': 0.05823809681799652}. Best is trial 8 with value: 0.8847914255627495.


In [16]:
 # Extract the best hyperparameters
best_params = study.best_params
best_params

{'iterations': 1592,
 'depth': 7,
 'learning_rate': 0.21253472976495014,
 'l2_leaf_reg': 3.328871477431899,
 'bagging_temperature': 0.05819662811218529}

In [17]:
import json
with open("best_params.json", "w") as file:
    json.dump(best_params, file)

In [18]:
with mlflow.start_run():
    mlflow.set_tag("mlflow.runName", "CatBoost HP Tuning using Optuna 10 Trials")
    mlflow.set_tag("experiment_type", "Catboost HP Tuning")
    mlflow.set_tag("model_type", "CatBoostClassifier")

    # Add a description
    mlflow.set_tag("description", "CatBoost HP Tuning with Optuna 10 trials")

    model = CatBoostClassifier(**best_params, verbose = True, task_type = "GPU",
                               devices = "0", early_stopping_rounds=50)

    # Log vectorizer parameters
    mlflow.log_param("vectorizer_type", "TF-IDF")
    mlflow.log_param("ngram_range", ngram_range)
    mlflow.log_param("vectorizer_max_features", max_features)
    mlflow.log_param("Algo_name", "CatBoostClassifier")

    # Train model
    model.fit(X_train, y_train)

    # Make predictions and log metrics
    y_pred = model.predict(X_test)

    # Log accuracy
    accuracy = accuracy_score(y_test, y_pred)
    mlflow.log_metric("accuracy", accuracy)

    # Log classification report
    classification_rep = classification_report(y_test, y_pred, output_dict=True)
    for label, metrics in classification_rep.items():
        if isinstance(metrics, dict):
            for metric, value in metrics.items():
                mlflow.log_metric(f"{label}_{metric}", value)

    # Log confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix: TF-IDF Unigrams, max_features={max_features}")
    plt.savefig("confusion_matrix.png")
    mlflow.log_artifact("confusion_matrix.png")
    plt.close()

    ## Create model_signature
    signature = infer_signature(X_train[:1], [y_train.iloc[0]])

    # Log hyperparameters
    try:
        hyperparameters = model.get_all_params()
        for param_name, param_value in hyperparameters.items():
            mlflow.log_param(param_name, param_value)
    except AttributeError:
        mlflow.log_param("hyperparameters", "Not available for this model")


    mlflow.catboost.log_model(cb_model=model, artifact_path=f"Catboost best params", signature=signature)
    mlflow.end_run()

0:	learn: 1.0643743	total: 111ms	remaining: 2m 57s
1:	learn: 1.0417485	total: 194ms	remaining: 2m 34s
2:	learn: 1.0225823	total: 263ms	remaining: 2m 19s
3:	learn: 1.0035347	total: 352ms	remaining: 2m 19s
4:	learn: 0.9905444	total: 420ms	remaining: 2m 13s
5:	learn: 0.9784346	total: 489ms	remaining: 2m 9s
6:	learn: 0.9673153	total: 558ms	remaining: 2m 6s
7:	learn: 0.9570654	total: 629ms	remaining: 2m 4s
8:	learn: 0.9474708	total: 695ms	remaining: 2m 2s
9:	learn: 0.9382662	total: 766ms	remaining: 2m 1s
10:	learn: 0.9299538	total: 831ms	remaining: 1m 59s
11:	learn: 0.9222025	total: 897ms	remaining: 1m 58s
12:	learn: 0.9143560	total: 965ms	remaining: 1m 57s
13:	learn: 0.9071569	total: 1.03s	remaining: 1m 56s
14:	learn: 0.9001948	total: 1.09s	remaining: 1m 55s
15:	learn: 0.8936547	total: 1.16s	remaining: 1m 54s
16:	learn: 0.8873454	total: 1.23s	remaining: 1m 53s
17:	learn: 0.8813017	total: 1.29s	remaining: 1m 52s
18:	learn: 0.8756755	total: 1.35s	remaining: 1m 52s
19:	learn: 0.8701205	total:



🏃 View run CatBoost HP Tuning using Optuna 10 Trials at: https://dagshub.com/Anmol25/youtube-sentiment-analysis.mlflow/#/experiments/12/runs/5df35dcee5434d3a8ae9992ec1e36228
🧪 View experiment at: https://dagshub.com/Anmol25/youtube-sentiment-analysis.mlflow/#/experiments/12


In [19]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.96      0.91      7979
           1       0.92      0.85      0.88      8000
           2       0.90      0.86      0.88      8000

    accuracy                           0.89     23979
   macro avg       0.89      0.89      0.89     23979
weighted avg       0.89      0.89      0.89     23979

