In [1]:
!pip install numpy pandas tensorflow scikit-learn keras

Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/ef/69/de33bd90dbddc8eede8f99ddeccfb374f7e18f84beb404bfe2cbbdf8df90/tensorflow-2.20.0-cp311-cp311-macosx_12_0_arm64.whl.metadata
  Using cached tensorflow-2.20.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (4.5 kB)
Collecting keras
  Obtaining dependency information for keras from https://files.pythonhosted.org/packages/d8/e5/8b40bada1f33f25deca7bad0e8c7ca6752f2b09e8018e2fc4693858dd662/keras-3.13.1-py3-none-any.whl.metadata
  Downloading keras-3.13.1-py3-none-any.whl.metadata (6.3 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl.metadata
  Using cached absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Obtaining dependency information

In [2]:
import numpy as np
import pandas as pd
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [3]:
reviews = [
    "The movie was excellent and engaging.",
    "A boring and dull story.",
    "Truly an amazing performance.",
    "The worst acting I have ever seen.",
    "Loved every minute of this film.",
    "Completely predictable and uninspired.",
    "A masterpiece of modern cinema.",
    "I regret spending my money on this.",
    "Fantastic plot with great actors.",
    "So bad, I walked out halfway through."
]

# Corresponding sentiment labels (1 for positive, 0 for negative)
sentiments = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]

# Create a DataFrame
df = pd.DataFrame({'review': reviews, 'sentiment': sentiments})
print("Sample Data:")
print(df)

Sample Data:
                                   review  sentiment
0   The movie was excellent and engaging.          1
1                A boring and dull story.          0
2           Truly an amazing performance.          1
3      The worst acting I have ever seen.          0
4        Loved every minute of this film.          1
5  Completely predictable and uninspired.          0
6         A masterpiece of modern cinema.          1
7     I regret spending my money on this.          0
8       Fantastic plot with great actors.          1
9   So bad, I walked out halfway through.          0


In [4]:
# Use Tokenizer to convert text to sequences of integers
max_words = 100 # Consider the top 100 most frequent words
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['review'])
X = tokenizer.texts_to_matrix(df['review'], mode='count')

# Encode the sentiment labels
y = np.array(df['sentiment'])

print("\nShape of input data (X):", X.shape)
print("Vocabulary size:", len(tokenizer.word_index))
print("\nFirst review vectorized:")
print(X[0])



Shape of input data (X): (10, 100)
Vocabulary size: 48

First review vectorized:
[0. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]


In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\nTraining samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])



Training samples: 8
Testing samples: 2


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense

# Build the feedforward neural network
model = Sequential()

# Explicit input layer
model.add(Input(shape=(max_words,)))

# Hidden layer with ReLU activation
model.add(Dense(32, activation='relu'))

# Output layer with sigmoid activation for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()


In [7]:

# Train the model
# 'epochs' is the number of times the entire dataset is passed forward and backward through the network
history = model.fit(X_train, y_train, epochs=10, batch_size=2, validation_split=0.2, verbose=1)



Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.5000 - loss: 0.6643 - val_accuracy: 0.5000 - val_loss: 0.5782
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5000 - loss: 0.6463 - val_accuracy: 0.5000 - val_loss: 0.5763
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6667 - loss: 0.6272 - val_accuracy: 0.5000 - val_loss: 0.5745
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6667 - loss: 0.6096 - val_accuracy: 0.5000 - val_loss: 0.5731
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8333 - loss: 0.5937 - val_accuracy: 0.5000 - val_loss: 0.5717
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8333 - loss: 0.5790 - val_accuracy: 0.5000 - val_loss: 0.5702
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━

In [8]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {accuracy*100:.2f}%")

# Make a prediction on new, unseen text
new_review = "The special effects were fantastic, but the plot was weak."
new_review_vec = tokenizer.texts_to_matrix([new_review], mode='count')

# Predict the sentiment
prediction = model.predict(new_review_vec)
sentiment_label = "Positive" if prediction[0][0] > 0.5 else "Negative"

print(f"\nNew review: '{new_review}'")
print(f"Prediction probability: {prediction[0][0]:.4f}")
print(f"Predicted sentiment: {sentiment_label}")



Test Accuracy: 50.00%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

New review: 'The special effects were fantastic, but the plot was weak.'
Prediction probability: 0.5832
Predicted sentiment: Positive
