In [2]:
!pip install datasets nltk


Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.4.1-py3-none-any.whl (487 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m487.4/487.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.wh

In [5]:
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from datasets import load_dataset

dataset = load_dataset("amazon_polarity", split="train")
df = pd.DataFrame(dataset).sample(n=200, random_state=42)

nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

df['sentiment_score'] = df['content'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
df['sentiment_label'] = df['sentiment_score'].apply(lambda x: 'Positive' if x > 0.05 else ('Negative' if x < -0.05 else 'Neutral'))

print(df[['content', 'sentiment_score', 'sentiment_label']].head())


                                                   content  sentiment_score  \
2079998  This product consists of a piece of thin flexi...           0.9041   
1443106  Even on the lowest setting, the toast is too d...          -0.5859   
3463669  I enjoyed this disc. The video is stunning. I ...           0.9545   
2914699  The authors pretend that parents neither die n...          -0.0654   
1603231  Might as well just use a knife, this product h...          -0.1779   

        sentiment_label  
2079998        Positive  
1443106        Negative  
3463669        Positive  
2914699        Negative  
1603231        Negative  


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [6]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['content'])
sequences = tokenizer.texts_to_sequences(df['content'])
padded_sequences = pad_sequences(sequences, maxlen=100, padding='post', truncating='post')

labels = np.array(df['label'])

split = int(0.8 * len(padded_sequences))
X_train, X_test = padded_sequences[:split], padded_sequences[split:]
y_train, y_test = labels[:split], labels[split:]


model = tf.keras.Sequential([
    tf.keras.layers.Embedding(5000, 64, input_length=100),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5, batch_size=16, validation_data=(X_test, y_test))

model.save("lstm_sentiment_model_200.h5")


Epoch 1/5




[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 140ms/step - accuracy: 0.5237 - loss: 0.6939 - val_accuracy: 0.4250 - val_loss: 0.6964
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - accuracy: 0.5657 - loss: 0.6904 - val_accuracy: 0.4500 - val_loss: 0.6984
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 0.5579 - loss: 0.6902 - val_accuracy: 0.4500 - val_loss: 0.6969
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 0.6144 - loss: 0.6780 - val_accuracy: 0.4500 - val_loss: 0.7012
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step - accuracy: 0.6227 - loss: 0.6417 - val_accuracy: 0.5250 - val_loss: 0.7237




In [7]:

cnn_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(5000, 64, input_length=100),
    tf.keras.layers.Conv1D(64, 5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

cnn_model.fit(X_train, y_train, epochs=5, batch_size=16, validation_data=(X_test, y_test))
cnn_model.save("cnn_sentiment_model_200.h5")


Epoch 1/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - accuracy: 0.5282 - loss: 0.6929 - val_accuracy: 0.4500 - val_loss: 0.6992
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5286 - loss: 0.6635 - val_accuracy: 0.4500 - val_loss: 0.7023
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.5645 - loss: 0.6292 - val_accuracy: 0.4500 - val_loss: 0.7028
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9913 - loss: 0.5891 - val_accuracy: 0.4500 - val_loss: 0.7028
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 1.0000 - loss: 0.5266 - val_accuracy: 0.4500 - val_loss: 0.7055




In [8]:
test_reviews = ["The product is amazing!", "Worst purchase ever!", "It was okay, not great."]
test_sequences = tokenizer.texts_to_sequences(test_reviews)
test_padded = pad_sequences(test_sequences, maxlen=100, padding='post')
predictions = model.predict(test_padded)
pred_labels = ["Positive" if p > 0.5 else "Negative" for p in predictions]


for review, sentiment in zip(test_reviews, pred_labels):
    print(f"Review: {review} | Sentiment: {sentiment}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 395ms/step
Review: The product is amazing! | Sentiment: Negative
Review: Worst purchase ever! | Sentiment: Negative
Review: It was okay, not great. | Sentiment: Negative
