In [5]:
import os
import json
import re
from zipfile import ZipFile
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [6]:
data = pd.read_csv("processed_reviews.csv")


In [7]:
data

Unnamed: 0,Review,label,processed_review
0,Best mobile phone\nCamera quality is very nice...,positive,best mobil phone camera qualiti nice batteri b...
1,Nice product with all features specially its l...,positive,nice product featur special look camera batter...
2,High quality camera😍,neutral,high qualiti camera😍
3,Camera Quality Is Improved Loving It,positive,camera qualiti improv love
4,Switch from OnePlus to iPhone I am stunned wit...,positive,switch oneplu iphon stun camera perform . ever...
...,...,...,...
703,Great product,positive,great product
704,Fabulous!,positive,fabul !
705,Highly recommended,positive,highli recommend
706,Classy product,positive,classi product


In [8]:
def clean_review(text):
    # Remove the phrase 'READ MORE'
    text = text.replace('READ MORE', '')
    # Remove special characters and symbols using regex
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    # Strip extra whitespace
    text = text.strip()
    return text
data['Review'] = data['Review'].apply(clean_review)

In [9]:
data.shape

(708, 3)

In [10]:
data.head()

Unnamed: 0,Review,label,processed_review
0,Best mobile phone\nCamera quality is very nice...,positive,best mobil phone camera qualiti nice batteri b...
1,Nice product with all features specially its l...,positive,nice product featur special look camera batter...
2,High quality camera,neutral,high qualiti camera😍
3,Camera Quality Is Improved Loving It,positive,camera qualiti improv love
4,Switch from OnePlus to iPhone I am stunned wit...,positive,switch oneplu iphon stun camera perform . ever...


In [11]:
data["label"].value_counts()

label
positive    620
neutral      73
negative     15
Name: count, dtype: int64

In [12]:
data.replace({"label": {"positive": 1, "negative": 0,"neutral":-1}}, inplace=True)

  data.replace({"label": {"positive": 1, "negative": 0,"neutral":-1}}, inplace=True)


In [13]:
data.head()

Unnamed: 0,Review,label,processed_review
0,Best mobile phone\nCamera quality is very nice...,1,best mobil phone camera qualiti nice batteri b...
1,Nice product with all features specially its l...,1,nice product featur special look camera batter...
2,High quality camera,-1,high qualiti camera😍
3,Camera Quality Is Improved Loving It,1,camera qualiti improv love
4,Switch from OnePlus to iPhone I am stunned wit...,1,switch oneplu iphon stun camera perform . ever...


In [14]:
data["label"].value_counts()

label
 1    620
-1     73
 0     15
Name: count, dtype: int64

In [15]:
# split data into training data and test data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [16]:
print(train_data.shape)
print(test_data.shape)

(566, 3)
(142, 3)


In [17]:
# Tokenize text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data["Review"])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["Review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["Review"]), maxlen=200)


In [18]:
print(X_train)

[[  0   0   0 ...   0   0  18]
 [  0   0   0 ...  97  55  97]
 [  0   0   0 ...   0  46   7]
 ...
 [  0   0   0 ...   0  81   8]
 [  0   0   0 ...   0  15  13]
 [  0   0   0 ... 354 936 937]]


In [19]:
print(X_test)

[[  0   0   0 ...   0   7   5]
 [  0   0   0 ...   0  11   4]
 [  0   0   0 ...   0   0  53]
 ...
 [  0   0   0 ...   0   0  20]
 [  0   0   0 ...  12   2  78]
 [  0   0   0 ... 204  31  47]]


In [20]:
Y_train = train_data["label"]
Y_test = test_data["label"]

In [21]:
print(Y_train)

522    1
24     1
480    1
314    1
239    1
      ..
71     1
106    1
270    1
435    1
102    1
Name: label, Length: 566, dtype: int64


In [22]:
# build the model

model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation="sigmoid"))



In [23]:
model.summary()

In [24]:
# compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [25]:
model.fit(X_train, Y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 164ms/step - accuracy: 0.6198 - loss: 0.6665 - val_accuracy: 0.8596 - val_loss: 0.6287
Epoch 2/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 141ms/step - accuracy: 0.9012 - loss: 0.5412 - val_accuracy: 0.8596 - val_loss: 0.5955
Epoch 3/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 136ms/step - accuracy: 0.8990 - loss: 0.4605 - val_accuracy: 0.8596 - val_loss: 0.5568
Epoch 4/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 136ms/step - accuracy: 0.8817 - loss: 0.4917 - val_accuracy: 0.8596 - val_loss: 0.5414
Epoch 5/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 143ms/step - accuracy: 0.8950 - loss: 0.4397 - val_accuracy: 0.8596 - val_loss: 0.5113
Epoch 6/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 127ms/step - accuracy: 0.8889 - loss: 0.4008 - val_accuracy: 0.8596 - val_loss: 0.4204
Epoch 7/10
[1m8/8[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x19ec768a5a0>

In [26]:
loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.8615 - loss: 0.0685
Test Loss: 0.09948017448186874
Test Accuracy: 0.8239436745643616


In [27]:
def predict_sentiment(review):
  # tokenize and pad the review
  sequence = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequence, maxlen=200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment

In [28]:
#def predict_sentiment(review):
    # Tokenize and pad the review
   #sequence = tokenizer.texts_to_sequences([review])
    #padded_sequence = pad_sequences(sequence, maxlen=200)
    
    # Predict sentiment
   # prediction = model.predict(padded_sequence)
    
    # Convert prediction to sentiment label
   # sentiment_labels = ['negative', 'neutral', 'positive']
    #sentiment_index = prediction.argmax()  # Get index of the highest probability
    #sentiment = sentiment_labels[sentiment_index]

    #return sentiment


In [29]:
# example usage
new_review = "This was very nice."
sentiment = predict_sentiment(new_review)
print(f"The sentiment of the review is: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
The sentiment of the review is: positive


In [31]:
# example usage
new_review = "Bad"
sentiment = predict_sentiment(new_review)
print(f"The sentiment of the review is: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
The sentiment of the review is: negative


In [30]:
#model.save("sentiment_lstm_model.h5")
model.save('my_model.keras')