In [11]:
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [29]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score
import re


In [13]:
df = pd.read_csv('/content/Reviews.csv')

df.dropna(inplace=True)

def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = text.lower()

    tokens = nltk.word_tokenize(text)
    stopwords_list = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stopwords_list]

    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    preprocessed_text = ' '.join(tokens)
    return preprocessed_text

df['clean_text'] = df['Text'].apply(preprocess_text)


In [14]:
X_train, X_test, y_train, y_test = train_test_split(df['clean_text'], df['Score'], test_size=0.2, random_state=42)

In [15]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000)

X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)

X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [16]:
svm_classifier = LinearSVC()
svm_classifier.fit(X_train_tfidf, y_train)


In [17]:
y_pred = svm_classifier.predict(X_test_tfidf)

print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


              precision    recall  f1-score   support

           1       0.63      0.67      0.65     10515
           2       0.49      0.13      0.20      5937
           3       0.48      0.22      0.30      8460
           4       0.49      0.21      0.29     16026
           5       0.77      0.96      0.86     72743

    accuracy                           0.73    113681
   macro avg       0.57      0.44      0.46    113681
weighted avg       0.68      0.73      0.68    113681

Accuracy: 0.7298757048231455


In [18]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.23.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [26]:
def predict_sentiment(review):
    processed_review = preprocess_text(review)
    review_tfidf = tfidf_vectorizer.transform([processed_review])
    prediction = svm_classifier.predict(review_tfidf)[0]

    sentiment = "Positive 😊" if prediction >= 4 else "Negative 😠" if prediction <= 2 else "Neutral 😐"
    return sentiment


In [28]:
import gradio as gr

interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(label="Enter a Review"),
    outputs=gr.Textbox(label="Sentiment Prediction"),
    title="Amazon Product Review Sentiment Analysis",
    description="Enter a review and get its sentiment: Positive, Neutral, or Negative."
)

interface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1102feb255c15575fa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


