In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import joblib

# Load your reviews dataset
reviews = pd.read_csv("/content/synthetic_reviews.csv")

# Make sure you have a 'ReviewText' column and a labeled 'Sentiment' column
# Sentiment can be: Positive / Neutral / Negative
# If not available, you can create labels using TextBlob as a starting point
from textblob import TextBlob
def label_sentiment(text):
    score = TextBlob(str(text)).sentiment.polarity
    if score > 0.2:
        return "Positive"
    elif score < -0.2:
        return "Negative"
    else:
        return "Neutral"

reviews["SentimentLabel"] = reviews["ReviewText"].apply(label_sentiment)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    reviews["ReviewText"], reviews["SentimentLabel"], test_size=0.2, random_state=42, stratify=reviews["SentimentLabel"]
)

# Build pipeline: TF-IDF + Logistic Regression
sentiment_model = Pipeline([
    ("tfidf", TfidfVectorizer(max_features=5000, ngram_range=(1,2))),
    ("clf", LogisticRegression(max_iter=500))
])

# Train
sentiment_model.fit(X_train, y_train)

# Evaluate
y_pred = sentiment_model.predict(X_test)
print(classification_report(y_test, y_pred))

# Save the model
joblib.dump(sentiment_model, "sentiment_model.pkl")
print("✅ Sentiment model saved")


              precision    recall  f1-score   support

    Negative       1.00      1.00      1.00      2415
     Neutral       1.00      1.00      1.00       793
    Positive       1.00      1.00      1.00      2792

    accuracy                           1.00      6000
   macro avg       1.00      1.00      1.00      6000
weighted avg       1.00      1.00      1.00      6000

✅ Sentiment model saved
