In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import joblib

In [3]:
# Load dataset
df = pd.read_csv("data/googleplaystore_user_reviews.csv")

In [4]:
# Clean dataset
df = df[['Translated_Review', 'Sentiment']]
df = df.dropna()

In [6]:
# Convert sentiment to star rating for demo:
sentiment_to_rating = {
    "Positive": 5,
    "Neutral": 3,
    "Negative": 1
}

In [7]:
df['Rating'] = df['Sentiment'].map(sentiment_to_rating)

X = df['Translated_Review']
y = df['Rating']

# Build pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('model', LogisticRegression(max_iter=1000))
])

In [8]:
# Train model
pipeline.fit(X, y)

# Save trained model
joblib.dump(pipeline, "model.pkl")

print("✅ Model saved successfully as model.pkl")

✅ Model saved successfully as model.pkl
