In [1]:
# NaiveBayesModel.ipynb

# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [3]:
# Step 2: Load the dataset
df = pd.read_csv('dataset.csv') 

# Step 3: Inspect the data (optional)
print(df.head())
print(df['Sentiment'].value_counts())

# Step 4: Separate features and labels
X = df['Comment']
y = df['Sentiment']  # 0 = negative, 1 = neutral, 2 = positive

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

# Step 6: Vectorization using TF-IDF
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Step 7: Initialize and train the Naive Bayes classifier
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

# Step 8: Make predictions and evaluate
y_pred = nb_model.predict(X_test_tfidf)

print("Multinomial Naive Bayes - Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

                                             Comment  Sentiment
0  Hopefully Batman shows up in this :fire::fire:...          2
1  Im making a solemn oath right now that Im not ...          1
2  Been a fan of his since euphoria dude is good ...          2
3    Every good story starts with a when i was a kid          2
4  Hunter Schafer? NEON produced horror film? Im ...          2
Sentiment
2    3110
1    2012
0    1840
Name: count, dtype: int64
Multinomial Naive Bayes - Accuracy: 0.6611629576453697
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.50      0.60       368
           1       0.78      0.37      0.50       403
           2       0.61      0.95      0.75       622

    accuracy                           0.66      1393
   macro avg       0.72      0.60      0.62      1393
weighted avg       0.70      0.66      0.64      1393

Confusion Matrix:
 [[184  25 159]
 [ 44 148 211]
 [ 17  16 589]]
