## Discussion 11
### Comparing RNN, LSTM and GRU-based NLP models

In [None]:
# Let's import our libraries

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, GRU, Dense, Dropout
from sklearn.model_selection import train_test_split

In [None]:
# Load and preprocess data
df = pd.read_csv()
texts = df['text'].values
labels = df['target'].values

# Text preprocessing
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=100, truncating='post')

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.2, random_state=42
)

def create_model(rnn_type):
    model = Sequential([
        Embedding(10000, 128, input_length=100),
        rnn_type(64, return_sequences=False),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
                 loss='binary_crossentropy',
                 metrics=['accuracy'])
    return model

# Create different model variants
models = {
    'RNN': create_model(SimpleRNN),
    'LSTM': create_model(LSTM),
    'GRU': create_model(GRU)
}

# Train and store history
history = {}
for name, model in models.items():
    print(f'\nTraining {name} model...')
    history[name] = model.fit(
        X_train, y_train,
        epochs=15,
        batch_size=64,
        validation_split=0.2,
        verbose=0
    )

In [None]:
# Visualization

plt.figure(figsize=(15, 6))

# Accuracy plot
plt.subplot(1, 2, 1)
for name, hist in history.items():
    plt.plot(hist.history['val_accuracy'], label=name)
plt.title('Validation Accuracy Comparison')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
for name, hist in history.items():
    plt.plot(hist.history['val_loss'], label=name)
plt.title('Validation Loss Comparison')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Evaluate on test set
results = {}
for name, model in models.items():
    results[name] = model.evaluate(X_test, y_test, verbose=0)
    print(f"{name} Test Accuracy: {results[name][1]:.4f}")