In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Set random seed
np.random.seed(42)
tf.random.set_seed(42)

# Create sample dataset (replace with your own dataset)
def load_data():
    # Sample data - in real application, load your dataset
    texts = [
        "I absolutely loved this movie, it was fantastic!",
        "This product is amazing, best purchase ever",
        "The service was terrible and the staff was rude",
        "I really hated the experience, would not recommend",
        "The movie was okay, nothing special but not bad either",
        "This restaurant is great, I'll definitely come back",
        "The quality of the product is poor",
        "I'm quite satisfied with my purchase",
        "The customer service was outstanding",
        "This is the worst experience I've ever had"
    ]

    # Labels: 0=negative, 1=neutral, 2=positive
    labels = [2, 2, 0, 0, 1, 2, 0, 1, 2, 0]

    # Generate more data by adding variations
    expanded_texts = texts.copy()
    expanded_labels = labels.copy()

    modifiers = ["really", "very", "extremely", "somewhat", "kind of"]
    for _ in range(90):  # Add 90 more samples
        idx = np.random.randint(0, len(texts))
        text = texts[idx]
        label = labels[idx]

        # Add random modifier
        if np.random.random() > 0.5:
            words = text.split()
            insert_pos = np.random.randint(1, len(words))
            words.insert(insert_pos, np.random.choice(modifiers))
            text = " ".join(words)

        expanded_texts.append(text)
        expanded_labels.append(label)

    return np.array(expanded_texts), np.array(expanded_labels)

# Load and prepare data
texts, labels = load_data()
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2)

# Tokenize text
max_words = 5000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Build RNN model
model = Sequential([
    Embedding(max_words, 128, input_length=max_len),
    LSTM(64, return_sequences=True),  # Use LSTM instead of SimpleRNN for better performance
    Dropout(0.3),
    LSTM(32),  # Second LSTM layer
    Dropout(0.3),
    Dense(3, activation='softmax')  # 3 classes: negative, neutral, positive
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train_pad, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_test_pad, y_test),
    verbose=1
)

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test_pad, y_test)
print(f"\nTest accuracy: {test_acc:.4f}")

# Make predictions
y_pred = np.argmax(model.predict(X_test_pad), axis=1)

# Generate classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Negative', 'Neutral', 'Positive']))

# Function to predict sentiment on new text
def predict_sentiment(text):
    # Tokenize and pad the input text
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_len)
    
    # Make prediction
    prediction = model.predict(padded)[0]
    predicted_class = np.argmax(prediction)
    confidence = np.max(prediction)
    
    # Map class to sentiment
    sentiments = ['Negative', 'Neutral', 'Positive']
    return sentiments[predicted_class], confidence

# Test the model with new examples
test_examples = [
    "The food was delicious and the service was excellent",
    "This was a complete waste of money",
    "The product works as expected, nothing extraordinary"
]

print("\nPredictions on new examples:")
for text in test_examples:
    sentiment, confidence = predict_sentiment(text)
    print(f"Text: '{text}'")
    print(f"Sentiment: {sentiment} (confidence: {confidence:.4f})")
    print()

print("Analysis complete.")



Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 1s/step - accuracy: 0.3820 - loss: 1.0813 - val_accuracy: 0.3500 - val_loss: 1.0503
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 266ms/step - accuracy: 0.4500 - loss: 1.0297 - val_accuracy: 0.3500 - val_loss: 1.0596
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 288ms/step - accuracy: 0.4383 - loss: 1.0331 - val_accuracy: 0.3500 - val_loss: 1.0283
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 266ms/step - accuracy: 0.4484 - loss: 0.9915 - val_accuracy: 0.3500 - val_loss: 0.9762
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 261ms/step - accuracy: 0.5211 - loss: 0.9644 - val_accuracy: 0.7000 - val_loss: 0.9243
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 260ms/step - accuracy: 0.5875 - loss: 0.9039 - val_accuracy: 0.8000 - val_loss: 0.8601
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━