# 📝 Text Classification using RNN

Welcome to **Natural Language Processing** with deep learning! In this notebook, we'll build RNNs to understand and classify text sentiment from movie reviews.

## What you'll learn:
- Text preprocessing and tokenization
- Word embeddings and vocabulary building
- RNN, LSTM, and GRU architectures
- Attention mechanisms for text

Let's teach machines to understand language! 🤖

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
from wordcloud import WordCloud
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

plt.style.use('seaborn-v0_8')
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

In [None]:
# Load IMDB dataset
vocab_size = 10000
max_length = 500

(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data(
    num_words=vocab_size
)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print(f"Vocabulary size: {vocab_size}")
print(f"Average review length: {np.mean([len(x) for x in X_train]):.1f}")
print(f"Max review length: {max([len(x) for x in X_train])}")

# Get word index
word_index = keras.datasets.imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])

# Show sample reviews
print("\n📝 Sample Reviews:")
for i in range(2):
    sentiment = "Positive" if y_train[i] == 1 else "Negative"
    print(f"\n{sentiment} Review:")
    print(decode_review(X_train[i])[:200] + "...")

In [None]:
# Pad sequences to fixed length
X_train_pad = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')

print(f"Padded training data shape: {X_train_pad.shape}")
print(f"Padded test data shape: {X_test_pad.shape}")

# Visualize sequence lengths
lengths = [len(x) for x in X_train]
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.hist(lengths, bins=50, alpha=0.7)
plt.axvline(max_length, color='red', linestyle='--', label=f'Max Length ({max_length})')
plt.xlabel('Review Length')
plt.ylabel('Frequency')
plt.title('📊 Distribution of Review Lengths')
plt.legend()

plt.subplot(1, 2, 2)
plt.bar(['Negative', 'Positive'], [np.sum(y_train == 0), np.sum(y_train == 1)])
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.title('📊 Class Distribution')

plt.tight_layout()
plt.show()

In [None]:
# Model 1: Simple RNN
def create_simple_rnn():
    model = models.Sequential([
        layers.Embedding(vocab_size, 128, input_length=max_length),
        layers.SimpleRNN(64, dropout=0.5, recurrent_dropout=0.5),
        layers.Dense(1, activation='sigmoid')
    ])
    return model

simple_rnn = create_simple_rnn()
simple_rnn.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("🔄 Simple RNN Architecture:")
simple_rnn.summary()

In [None]:
# Train Simple RNN
callbacks = [
    EarlyStopping(patience=3, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.5, patience=2, min_lr=1e-7)
]

print("🚀 Training Simple RNN...")
history_rnn = simple_rnn.fit(
    X_train_pad, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

rnn_test_loss, rnn_test_acc = simple_rnn.evaluate(X_test_pad, y_test, verbose=0)
print(f"\n🎯 Simple RNN Test Accuracy: {rnn_test_acc:.4f}")

In [None]:
# Model 2: LSTM Network
def create_lstm_model():
    model = models.Sequential([
        layers.Embedding(vocab_size, 128, input_length=max_length),
        layers.Bidirectional(layers.LSTM(64, dropout=0.5, recurrent_dropout=0.5)),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')
    ])
    return model

lstm_model = create_lstm_model()
lstm_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("🧠 LSTM Model Architecture:")
lstm_model.summary()

In [None]:
# Train LSTM
print("🚀 Training LSTM Model...")
history_lstm = lstm_model.fit(
    X_train_pad, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

lstm_test_loss, lstm_test_acc = lstm_model.evaluate(X_test_pad, y_test, verbose=0)
print(f"\n🎯 LSTM Test Accuracy: {lstm_test_acc:.4f}")

In [None]:
# Model 3: GRU with Attention
class AttentionLayer(layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', 
                                shape=(input_shape[-1], 1),
                                initializer='random_normal',
                                trainable=True)
        super(AttentionLayer, self).build(input_shape)
    
    def call(self, x):
        # Compute attention scores
        e = tf.nn.tanh(tf.tensordot(x, self.W, axes=1))
        a = tf.nn.softmax(e, axis=1)
        # Apply attention weights
        output = x * a
        return tf.reduce_sum(output, axis=1)

def create_gru_attention_model():
    inputs = layers.Input(shape=(max_length,))
    x = layers.Embedding(vocab_size, 128)(inputs)
    x = layers.Bidirectional(layers.GRU(64, return_sequences=True, dropout=0.5))(x)
    x = AttentionLayer()(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    model = models.Model(inputs, outputs)
    return model

gru_attention = create_gru_attention_model()
gru_attention.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("🎯 GRU + Attention Model Architecture:")
gru_attention.summary()

In [None]:
# Train GRU with Attention
print("🚀 Training GRU + Attention Model...")
history_gru = gru_attention.fit(
    X_train_pad, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

gru_test_loss, gru_test_acc = gru_attention.evaluate(X_test_pad, y_test, verbose=0)
print(f"\n🎯 GRU + Attention Test Accuracy: {gru_test_acc:.4f}")

In [None]:
# Compare all models
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Plot training histories
histories = [history_rnn, history_lstm, history_gru]
model_names = ['Simple RNN', 'LSTM', 'GRU + Attention']
test_accs = [rnn_test_acc, lstm_test_acc, gru_test_acc]

for i, (history, name, test_acc) in enumerate(zip(histories, model_names, test_accs)):
    # Accuracy plots
    axes[0, i].plot(history.history['accuracy'], label='Training')
    axes[0, i].plot(history.history['val_accuracy'], label='Validation')
    axes[0, i].set_title(f'📈 {name} - Accuracy\nTest: {test_acc:.3f}')
    axes[0, i].set_xlabel('Epoch')
    axes[0, i].set_ylabel('Accuracy')
    axes[0, i].legend()
    axes[0, i].grid(True, alpha=0.3)
    
    # Loss plots
    axes[1, i].plot(history.history['loss'], label='Training')
    axes[1, i].plot(history.history['val_loss'], label='Validation')
    axes[1, i].set_title(f'📉 {name} - Loss')
    axes[1, i].set_xlabel('Epoch')
    axes[1, i].set_ylabel('Loss')
    axes[1, i].legend()
    axes[1, i].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n🏆 Model Comparison:")
for name, acc in zip(model_names, test_accs):
    print(f"{name}: {acc:.4f}")

In [None]:
# Analyze predictions
y_pred = (gru_attention.predict(X_test_pad) > 0.5).astype(int)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Negative', 'Positive'],
            yticklabels=['Negative', 'Positive'])
plt.title('🎯 Confusion Matrix - GRU + Attention')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

## 🎉 Congratulations!

You've mastered text classification with RNNs! Here's what you've accomplished:

✅ **Text Processing**: Tokenization and sequence handling  
✅ **RNN Architectures**: Simple RNN, LSTM, GRU  
✅ **Attention Mechanism**: Focusing on important words  
✅ **Sentiment Analysis**: Understanding text emotions  

### 🚀 Next Steps:
1. Try pre-trained embeddings (GloVe, Word2Vec)
2. Experiment with Transformer models
3. Build multi-class text classifiers
4. Move on to **Project 05: Time Series Forecasting with LSTM**

Ready for time series? Let's predict the future! ⏰