In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("Libraries imported successfully!")

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [None]:
# Implementing Bidirectional RNN
def create_bidirectional_rnn(input_shape, hidden_units=50, output_units=1):
    """
    Create a Bidirectional RNN model
    """
    model = keras.Sequential([
        keras.layers.Bidirectional(
            keras.layers.SimpleRNN(hidden_units, return_sequences=False),
            input_shape=input_shape
        ),
        keras.layers.Dense(output_units, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Create synthetic data to demonstrate bidirectional processing
def create_palindrome_data(n_samples=1000, sequence_length=10):
    """
    Create data where we need to identify palindromes
    This requires both forward and backward context
    """
    X = []
    y = []
    
    for _ in range(n_samples):
        # Create random sequence
        sequence = np.random.randint(0, 10, sequence_length)
        
        # Create palindrome 50% of the time
        if np.random.random() < 0.5:
            # Make it a palindrome
            mid = sequence_length // 2
            sequence[mid:] = sequence[:mid][::-1]
            label = 1
        else:
            # Keep it random
            label = 0
        
        X.append(sequence)
        y.append(label)
    
    return np.array(X), np.array(y)

# Generate palindrome detection data
print("Creating palindrome detection dataset...")
X_pal, y_pal = create_palindrome_data(n_samples=2000, sequence_length=8)

# Normalize the input
X_pal = X_pal / 10.0  # Scale to [0, 1]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_pal, y_pal, test_size=0.2, random_state=42
)

# Reshape for RNN (add feature dimension)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print(f"Training data shape: {X_train.shape}")
print(f"Training labels distribution: {np.bincount(y_train)}")

# Create and compare models
print("\nCreating Unidirectional RNN...")
uni_model = keras.Sequential([
    keras.layers.SimpleRNN(32, input_shape=(8, 1), return_sequences=False),
    keras.layers.Dense(1, activation='sigmoid')
])
uni_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print("Creating Bidirectional RNN...")
bi_model = create_bidirectional_rnn(input_shape=(8, 1), hidden_units=32)

print("\nModel architectures:")
print("Unidirectional RNN:")
uni_model.summary()
print("\nBidirectional RNN:")
bi_model.summary()


In [None]:
# Train and compare models
print("Training Unidirectional RNN...")
uni_history = uni_model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=20,
    validation_data=(X_test, y_test),
    verbose=0
)

print("Training Bidirectional RNN...")
bi_history = bi_model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=20,
    validation_data=(X_test, y_test),
    verbose=0
)

# Evaluate models
uni_accuracy = uni_model.evaluate(X_test, y_test, verbose=0)[1]
bi_accuracy = bi_model.evaluate(X_test, y_test, verbose=0)[1]

print(f"\nModel Performance on Palindrome Detection:")
print(f"Unidirectional RNN Accuracy: {uni_accuracy:.4f}")
print(f"Bidirectional RNN Accuracy: {bi_accuracy:.4f}")
print(f"Improvement: {(bi_accuracy - uni_accuracy):.4f}")

# Visualize training progress
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy comparison
axes[0].plot(uni_history.history['accuracy'], label='Uni RNN - Train')
axes[0].plot(uni_history.history['val_accuracy'], label='Uni RNN - Val')
axes[0].plot(bi_history.history['accuracy'], label='Bi RNN - Train')
axes[0].plot(bi_history.history['val_accuracy'], label='Bi RNN - Val')
axes[0].set_title('Model Accuracy Comparison')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Loss comparison
axes[1].plot(uni_history.history['loss'], label='Uni RNN - Train')
axes[1].plot(uni_history.history['val_loss'], label='Uni RNN - Val')
axes[1].plot(bi_history.history['loss'], label='Bi RNN - Train')
axes[1].plot(bi_history.history['val_loss'], label='Bi RNN - Val')
axes[1].set_title('Model Loss Comparison')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Implementing LSTM Networks
def create_lstm_model(input_shape, hidden_units=50, output_units=1, task_type='regression'):
    """
    Create an LSTM model for different tasks
    """
    model = keras.Sequential([
        keras.layers.LSTM(
            hidden_units,
            input_shape=input_shape,
            return_sequences=False
        ),
        keras.layers.Dense(output_units, 
                          activation='linear' if task_type=='regression' else 'sigmoid')
    ])
    
    if task_type == 'regression':
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    else:
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Create data with long-term dependencies for LSTM demonstration
def create_long_dependency_data(n_samples=1000, sequence_length=50):
    """
    Create data where early information affects final output
    """
    X = []
    y = []
    
    for _ in range(n_samples):
        # Random sequence
        sequence = np.random.randn(sequence_length)
        
        # Important signal at the beginning
        signal_position = np.random.randint(0, 5)  # Signal in first 5 positions
        signal_value = np.random.choice([-1, 1])
        sequence[signal_position] = signal_value * 5  # Strong signal
        
        # Output depends on early signal
        label = 1 if signal_value > 0 else 0
        
        X.append(sequence)
        y.append(label)
    
    return np.array(X), np.array(y)

print("Creating long-term dependency dataset...")
X_long, y_long = create_long_dependency_data(n_samples=2000, sequence_length=30)

# Reshape for RNN
X_long = X_long.reshape(X_long.shape[0], X_long.shape[1], 1)

# Split data
X_train_long, X_test_long, y_train_long, y_test_long = train_test_split(
    X_long, y_long, test_size=0.2, random_state=42
)

print(f"Long dependency data shape: {X_train_long.shape}")

# Compare SimpleRNN vs LSTM on long dependencies
print("\nCreating models for long-term dependency comparison...")

simple_rnn = keras.Sequential([
    keras.layers.SimpleRNN(32, input_shape=(30, 1)),
    keras.layers.Dense(1, activation='sigmoid')
])
simple_rnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

lstm_model = create_lstm_model(
    input_shape=(30, 1), 
    hidden_units=32, 
    output_units=1, 
    task_type='classification'
)

print("Training SimpleRNN on long dependency task...")
simple_history = simple_rnn.fit(
    X_train_long, y_train_long,
    batch_size=32, epochs=30,
    validation_data=(X_test_long, y_test_long),
    verbose=0
)

print("Training LSTM on long dependency task...")
lstm_history = lstm_model.fit(
    X_train_long, y_train_long,
    batch_size=32, epochs=30,
    validation_data=(X_test_long, y_test_long),
    verbose=0
)

# Evaluate models
simple_acc = simple_rnn.evaluate(X_test_long, y_test_long, verbose=0)[1]
lstm_acc = lstm_model.evaluate(X_test_long, y_test_long, verbose=0)[1]

print(f"\nLong-term Dependency Task Performance:")
print(f"SimpleRNN Accuracy: {simple_acc:.4f}")
print(f"LSTM Accuracy: {lstm_acc:.4f}")
print(f"LSTM Improvement: {(lstm_acc - simple_acc):.4f}")


In [None]:
# Implementing GRU and comparing all variants
def create_gru_model(input_shape, hidden_units=50, output_units=1, task_type='regression'):
    """Create a GRU model"""
    model = keras.Sequential([
        keras.layers.GRU(
            hidden_units,
            input_shape=input_shape,
            return_sequences=False
        ),
        keras.layers.Dense(output_units, 
                          activation='linear' if task_type=='regression' else 'sigmoid')
    ])
    
    if task_type == 'regression':
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    else:
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Create GRU model for comparison
print("Creating GRU model...")
gru_model = create_gru_model(
    input_shape=(30, 1),
    hidden_units=32,
    output_units=1,
    task_type='classification'
)

print("Training GRU on long dependency task...")
gru_history = gru_model.fit(
    X_train_long, y_train_long,
    batch_size=32, epochs=30,
    validation_data=(X_test_long, y_test_long),
    verbose=0
)

# Evaluate GRU
gru_acc = gru_model.evaluate(X_test_long, y_test_long, verbose=0)[1]

print(f"GRU Accuracy: {gru_acc:.4f}")

# Compare model complexities
def count_parameters(model):
    """Count trainable parameters in model"""
    return model.count_params()

print(f"\nModel Complexity Comparison:")
print(f"SimpleRNN parameters: {count_parameters(simple_rnn):,}")
print(f"LSTM parameters: {count_parameters(lstm_model):,}")
print(f"GRU parameters: {count_parameters(gru_model):,}")

# Create comprehensive comparison plot
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Accuracy comparison
models = ['SimpleRNN', 'LSTM', 'GRU']
accuracies = [simple_acc, lstm_acc, gru_acc]
colors = ['skyblue', 'lightcoral', 'lightgreen']

axes[0, 0].bar(models, accuracies, color=colors)
axes[0, 0].set_title('Model Accuracy Comparison')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].set_ylim(0.4, 1.0)
for i, v in enumerate(accuracies):
    axes[0, 0].text(i, v + 0.01, f'{v:.3f}', ha='center')

# Parameter count comparison
param_counts = [count_parameters(simple_rnn), count_parameters(lstm_model), count_parameters(gru_model)]
axes[0, 1].bar(models, param_counts, color=colors)
axes[0, 1].set_title('Model Complexity (Parameters)')
axes[0, 1].set_ylabel('Number of Parameters')
for i, v in enumerate(param_counts):
    axes[0, 1].text(i, v + 50, f'{v:,}', ha='center')

# Training accuracy evolution
axes[1, 0].plot(simple_history.history['val_accuracy'], label='SimpleRNN', linewidth=2)
axes[1, 0].plot(lstm_history.history['val_accuracy'], label='LSTM', linewidth=2)
axes[1, 0].plot(gru_history.history['val_accuracy'], label='GRU', linewidth=2)
axes[1, 0].set_title('Validation Accuracy During Training')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Validation Accuracy')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Training loss evolution
axes[1, 1].plot(simple_history.history['val_loss'], label='SimpleRNN', linewidth=2)
axes[1, 1].plot(lstm_history.history['val_loss'], label='LSTM', linewidth=2)
axes[1, 1].plot(gru_history.history['val_loss'], label='GRU', linewidth=2)
axes[1, 1].set_title('Validation Loss During Training')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Validation Loss')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
