# 🧠 Simple RNN for Sentiment Analysis
Using a custom `sentiment.csv` dataset

In [1]:
#  Step 1: Import Libraries
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [2]:
#  Step 2: Load and Preprocess Dataset
df = pd.read_csv("sentiment.csv")

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text

df['text'] = df['text'].apply(clean_text)
df.head()

Unnamed: 0,text,label
0,i love this movie,1
1,this was the worst movie ever,0
2,amazing story and direction,1
3,totally boring and slow,0
4,great performance by the lead actor,1


In [3]:
#  Step 3: Tokenization and Padding
max_words = 5000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])
X = pad_sequences(sequences, maxlen=max_len)
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
#  Step 4: Build and Train Simple RNN Model
model = Sequential()
model.add(Embedding(max_words, 64, input_length=max_len))
model.add(SimpleRNN(64))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

model.fit(X_train, y_train, epochs=5, batch_size=2, validation_split=0.1)



Epoch 1/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - accuracy: 0.2881 - loss: 0.7206 - val_accuracy: 1.0000 - val_loss: 0.6770
Epoch 2/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.9095 - loss: 0.6231 - val_accuracy: 0.0000e+00 - val_loss: 0.7779
Epoch 3/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.7595 - loss: 0.5923 - val_accuracy: 0.0000e+00 - val_loss: 0.7823
Epoch 4/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.9095 - loss: 0.4796 - val_accuracy: 0.0000e+00 - val_loss: 0.8471
Epoch 5/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 1.0000 - loss: 0.4273 - val_accuracy: 0.0000e+00 - val_loss: 0.8405


<keras.src.callbacks.history.History at 0x25350efa500>

In [5]:
#  Step 5: Evaluate Model
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.5000 - loss: 0.7483
Test Accuracy: 0.5000


In [None]:
# ✅ Step 6: Predict Sentiment for New Text

def predict_sentiment(text):
    # Preprocess
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)

    # Tokenize and pad
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len)

    # Predict
    pred = model.predict(padded)[0][0]
    sentiment = "Positive" if pred >= 0.5 else "Negative"
    
    print(f"Statement: {text}")
    print(f"Predicted Sentiment: {sentiment} (Confidence: {pred:.2f})")

# 🔍 Example:
predict_sentiment("I really enjoyed this movie!")
predict_sentiment("The plot was terrible and acting was bad.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
Review: i really enjoyed this movie
Predicted Sentiment: Positive (Confidence: 0.54)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Review: the plot was terrible and acting was bad
Predicted Sentiment: Negative (Confidence: 0.41)
