In [2]:
# 1. Imports & setup
import pandas as pd
import nltk, string
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import gradio as gr

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
# 2. Preprocessing function
def preprocess(text):
    text = text.lower().translate(str.maketrans('', '', string.punctuation))
    tokens = [t for t in text.split() if t.isalpha() and t not in stop_words]
    return " ".join(tokens)

In [15]:
# 3. Data load & class balance
df = pd.read_csv("Reviews.csv")[['Text', 'Score']].dropna()
df['Sentiment'] = df['Score'].apply(lambda s: 'positive' if s>=4 else ('neutral' if s==3 else 'negative'))
samples = []
for cls in ['positive','neutral','negative']:
    samples.append(df[df['Sentiment']==cls].sample(4000, random_state=42))
df = pd.concat(samples).sample(frac=1, random_state=42)
df['Cleaned'] = df['Text'].apply(preprocess)

In [5]:
# 4. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    df['Cleaned'], df['Sentiment'], test_size=0.2, stratify=df['Sentiment'], random_state=42
)

In [6]:
# 5. Vectorize and train Naive Bayes
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)
model = MultinomialNB()
model.fit(X_train_vec, y_train)

In [7]:
# 6. Evaluate performance
y_pred = model.predict(X_test_vec)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

✅ Accuracy: 0.6804166666666667
              precision    recall  f1-score   support

    negative       0.71      0.66      0.69       800
     neutral       0.59      0.63      0.61       800
    positive       0.75      0.74      0.75       800

    accuracy                           0.68      2400
   macro avg       0.68      0.68      0.68      2400
weighted avg       0.68      0.68      0.68      2400



In [16]:
# 7. Gradio interface
def analyze_sentiment(review):
    vec = vectorizer.transform([preprocess(review)])
    probs = model.predict_proba(vec)[0]
    sentiment = model.classes_[probs.argmax()]
    confs = dict(zip(model.classes_, probs))
    return sentiment.capitalize(), confs

iface = gr.Interface(
    fn=analyze_sentiment,
    inputs="text",
    outputs=["text", "label"],
    title="🧠 Sentiment Analyzer (Naive Bayes)",
    description="Classifies reviews as Positive, Neutral, or Negative with confidence scores.",
    examples=[
        ["The food was great!"],
        ["It was okay."],
        ["I didn't like it at all."]
    ]
)
iface.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d0b224baf52704a7f3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


