In [387]:
# !pip install gradio

In [25]:
import pandas as pd
import numpy as np
import nltk
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import pickle
import warnings
warnings.filterwarnings('ignore')

In [27]:
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [29]:
#df = pd.read_json('http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz', lines=True, nrows=50000)

#df = pd.read_json('reviews_Electronics_5.json.gz', lines=True)

reader = pd.read_json("reviews_Electronics_5.json.gz", lines=True, chunksize=5000)

df = next(reader)   # first 5000 rows only
print(df.shape)



(5000, 9)


In [31]:
df['sentiment'] = df['overall'].apply(lambda x: 'positive' if x > 3 else 'negative' if x < 3 else 'neutral')

In [33]:
def process_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    tokens =  [token for token in tokens if token not in stop_words]
    lemma = WordNetLemmatizer()
    tokens =  [lemma.lemmatize(token) for token in tokens]
    return ' '.join(tokens)






In [35]:
df['preprocessedtext'] = df['reviewText'].apply(process_text)

In [36]:
label_map = {"negative":0, "positive":1, "neutral":2}

In [37]:
y = df['sentiment'].map(label_map)

In [41]:
from tensorflow.keras.utils import to_categorical

In [42]:
y = to_categorical(y)

In [43]:
y.shape

(5000, 3)

In [47]:
tfidf_vector = TfidfVectorizer(max_features=5000)

In [49]:
X = tfidf_vector.fit_transform(df['preprocessedtext']).toarray()

In [51]:
X.shape

(5000, 5000)

In [53]:
X = X.reshape(X.shape[0], 1,X.shape[1])

In [55]:
X[0][0]

array([0., 0., 0., ..., 0., 0., 0.])

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [59]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

In [61]:
model = Sequential(
    [
        LSTM(64, input_shape = (1,5000), return_sequences=True),
        LSTM(32),
        Dense(16, activation="relu"),
        Dropout(0.5),
        Dense(3, activation="softmax")

    ]
)

In [63]:
model.compile(optimizer="adam", loss = "categorical_crossentropy", metrics=['accuracy'])

In [65]:
model.summary()

In [67]:
model.fit(X_train, y_train, epochs=5,validation_split=0.2)

Epoch 1/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 43ms/step - accuracy: 0.7800 - loss: 0.8907 - val_accuracy: 0.7962 - val_loss: 0.6465
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.8041 - loss: 0.6554 - val_accuracy: 0.7962 - val_loss: 0.5972
Epoch 3/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - accuracy: 0.8109 - loss: 0.5293 - val_accuracy: 0.7962 - val_loss: 0.5493
Epoch 4/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.8494 - loss: 0.3834 - val_accuracy: 0.8225 - val_loss: 0.5743
Epoch 5/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.8988 - loss: 0.2747 - val_accuracy: 0.8138 - val_loss: 0.6940


<keras.src.callbacks.history.History at 0x20e9182b5c0>

In [68]:
test_accuracy, test_loss = model.evaluate(X_test, y_test)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8050 - loss: 0.6892


In [69]:
def predict_sentiment(review):
  pro_text = process_text(review)
  vector = tfidf_vector.transform([pro_text]).toarray()
  vector = vector.reshape(1,1,5000)
  prediction_probs = model.predict(vector)[0]
  prediction_idx = np.argmax(prediction_probs)
  sentiment_map = {0:"negative", 1:"positive", 2:"neutral"}
  prediction = sentiment_map[prediction_idx]
  confidence = prediction_probs[prediction_idx]

  return {
        'Sentiment': prediction,
        'Confidence':confidence,
        'Processed Text': pro_text
    }

In [None]:
iface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(label="Enter Product Review", lines=5),
    outputs=gr.JSON(label="Prediction Results"),
    title="Product Review Sentiment Analyzer",
    description="Enter a product review to analyze its sentiment using an LSTM neural network.",
    examples=[
        ["This product exceeded my expectations. Great value for money!"],
        ["The quality is poor and it stopped working after a week."],
        ["Decent product but a bit overpriced for what it offers."]
    ]
)

# Launch the interface
iface.launch(debug=True)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 872ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Using existing dataset file at: .gradio\flagged\dataset1.csv
