<a href="https://colab.research.google.com/github/OIBSIP-25-26/P3/blob/main/P3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
sentiment_analysis.py
Simple sentiment classifier using TF-IDF + MultinomialNB on synthetic review data.
Run: python sentiment_analysis.py
Generates: model saved (joblib), classification report printed.
"""
import pandas as pd
import numpy as np
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix
import joblib

OUT_DIR = "sentiment_output"
os.makedirs(OUT_DIR, exist_ok=True)
np.random.seed(0)

# Synthetic dataset: short reviews with labels
positive = [
    "Great product, loved it",
    "Excellent service and fast delivery",
    "Highly recommend this. Very satisfied",
    "Amazing quality and price",
    "Superb, will buy again"
]
negative = [
    "Terrible experience, broke after a day",
    "Very disappointed with the product",
    "Won't buy again, low quality",
    "Customer service was rude",
    "Bad, not as described"
]
neutral = [
    "Product is okay, nothing special",
    "Average quality, expected more",
    "It's fine for the price",
    "Neutral feelings about this item",
    "Neither good nor bad"
]

texts = (positive * 60) + (negative * 60) + (neutral * 30)
labels = (["positive"] * 300) + (["negative"] * 300) + (["neutral"] * 150)
df = pd.DataFrame({"text": texts, "label": labels})
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df.to_csv(os.path.join(OUT_DIR, "sentiment_synthetic.csv"), index=False)

# If you have a CSV: uncomment & set path
# df = pd.read_csv("sentiment.csv")

X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42, stratify=df["label"])

vect = TfidfVectorizer(ngram_range=(1,2), max_features=2000)
Xtr = vect.fit_transform(X_train)
Xte = vect.transform(X_test)

model = MultinomialNB()
model.fit(Xtr, y_train)

y_pred = model.predict(Xte)
print("=== Classification report ===")
print(classification_report(y_test, y_pred))

# Save vectorizer & model
joblib.dump(vect, os.path.join(OUT_DIR, "tfidf_vectorizer.joblib"))
joblib.dump(model, os.path.join(OUT_DIR, "sentiment_nb_model.joblib"))

print(f"Model and vectorizer saved to {OUT_DIR}")


=== Classification report ===
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00        60
     neutral       1.00      1.00      1.00        30
    positive       1.00      1.00      1.00        60

    accuracy                           1.00       150
   macro avg       1.00      1.00      1.00       150
weighted avg       1.00      1.00      1.00       150

Model and vectorizer saved to sentiment_output
