In [None]:
# 🎯 Google Play Store Sentiment Classifier (Naive Bayes)
# by Tenika Powell 🧠💬

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


url = "https://raw.githubusercontent.com/4GeeksAcademy/naive-bayes-project-tutorial/main/playstore_reviews.csv"
df = pd.read_csv(url)


df = df[["review", "polarity"]].dropna()
df["review"] = df["review"].str.strip().str.lower()


X_train, X_test, y_train, y_test = train_test_split(
    df["review"], df["polarity"], test_size=0.2, random_state=42
)


vec = CountVectorizer(stop_words="english")
X_train = vec.fit_transform(X_train).toarray()
X_test = vec.transform(X_test).toarray()


models = {
    "GaussianNB": GaussianNB(),
    "MultinomialNB": MultinomialNB(),
    "BernoulliNB": BernoulliNB(),
}

print("🧪 Evaluating Naive Bayes models...\n")

for name, model in models.items():
    try:
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        print(f"--- {name} ---")
        print("Accuracy:", round(accuracy_score(y_test, preds), 3))
        print(classification_report(y_test, preds))
    except Exception as e:
        print(f"{name} failed: {e}\n")


print("\n🌳 Random Forest Comparison\n")

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

print("Accuracy:", round(accuracy_score(y_test, rf_preds), 3))
print(classification_report(y_test, rf_preds))


print("\n✅ MultinomialNB usually performs best for text data.")
print("If Random Forest is higher, that means your dataset has more complex word relationships.")


🧪 Evaluating Naive Bayes models...

--- GaussianNB ---
Accuracy: 0.804
              precision    recall  f1-score   support

           0       0.85      0.88      0.86       126
           1       0.69      0.62      0.65        53

    accuracy                           0.80       179
   macro avg       0.77      0.75      0.76       179
weighted avg       0.80      0.80      0.80       179

--- MultinomialNB ---
Accuracy: 0.816
              precision    recall  f1-score   support

           0       0.84      0.90      0.87       126
           1       0.73      0.60      0.66        53

    accuracy                           0.82       179
   macro avg       0.79      0.75      0.77       179
weighted avg       0.81      0.82      0.81       179

--- BernoulliNB ---
Accuracy: 0.771
              precision    recall  f1-score   support

           0       0.79      0.93      0.85       126
           1       0.70      0.40      0.51        53

    accuracy                         