In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("Long-term Dependencies Analysis initialized!")

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# 1. Long-term Dependency Task Generator
class LongTermDependencyTasks:
    """
    Generate various tasks to test long-term dependency capabilities
    """
    
    def __init__(self, vocab_size=10):
        self.vocab_size = vocab_size
        
    def copy_task(self, seq_length=100, gap_length=50, num_samples=1000):
        """
        Copy task: remember information from beginning and reproduce after gap
        """
        X = []
        y = []
        
        for _ in range(num_samples):
            # Create sequence with information at start, gap, then prediction target
            sequence = np.zeros(seq_length)
            
            # Random information at beginning (length 5)
            info_length = 5
            sequence[:info_length] = np.random.randint(1, self.vocab_size, info_length)
            
            # Gap filled with zeros
            # sequence[info_length:info_length+gap_length] remains zeros
            
            # Target: reproduce the information after gap
            target = np.zeros(seq_length)
            start_reproduce = info_length + gap_length
            if start_reproduce + info_length <= seq_length:
                target[start_reproduce:start_reproduce+info_length] = sequence[:info_length]
            
            X.append(sequence)
            y.append(target)
        
        return np.array(X), np.array(y)
    
    def adding_task(self, seq_length=100, num_samples=1000):
        """
        Adding task: sum two marked numbers in a long sequence
        """
        X = []
        y = []
        
        for _ in range(num_samples):
            # Create sequence of random numbers between 0 and 1
            sequence = np.random.uniform(0, 1, (seq_length, 2))
            
            # Mark two positions to add
            mark_positions = np.random.choice(seq_length//2, 2, replace=False)
            
            # Second column is marker (1 for marked positions, 0 otherwise)
            sequence[:, 1] = 0
            sequence[mark_positions, 1] = 1
            
            # Target is sum of marked values
            target = np.sum(sequence[mark_positions, 0])
            
            X.append(sequence)
            y.append(target)
        
        return np.array(X), np.array(y)
    
    def permuted_sequential_mnist(self, permutation_seed=42):
        """
        Permuted Sequential MNIST: classify digit from permuted pixel sequence
        """
        # Load MNIST data
        (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
        
        # Normalize and reshape
        x_train = x_train.astype('float32') / 255.0
        x_test = x_test.astype('float32') / 255.0
        
        # Flatten images to sequences
        x_train = x_train.reshape(x_train.shape[0], -1)
        x_test = x_test.reshape(x_test.shape[0], -1)
        
        # Create fixed permutation
        np.random.seed(permutation_seed)
        permutation = np.random.permutation(784)
        
        # Apply permutation
        x_train = x_train[:, permutation]
        x_test = x_test[:, permutation]
        
        # Reshape for RNN input (samples, timesteps, features)
        x_train = x_train.reshape(x_train.shape[0], 784, 1)
        x_test = x_test.reshape(x_test.shape[0], 784, 1)
        
        return (x_train, y_train), (x_test, y_test)

# 2. Gradient Flow Analyzer
class GradientFlowAnalyzer:
    """
    Analyze gradient flow for long-term dependencies
    """
    
    def __init__(self):
        self.gradient_history = {}
        
    def create_gradient_tracking_model(self, model_type='SimpleRNN', units=50, seq_length=100):
        """
        Create model with gradient tracking capabilities
        """
        if model_type == 'SimpleRNN':
            model = keras.Sequential([
                layers.SimpleRNN(units, return_sequences=True, input_shape=(seq_length, 1)),
                layers.Dense(1)
            ])
        elif model_type == 'LSTM':
            model = keras.Sequential([
                layers.LSTM(units, return_sequences=True, input_shape=(seq_length, 1)),
                layers.Dense(1)
            ])
        elif model_type == 'GRU':
            model = keras.Sequential([
                layers.GRU(units, return_sequences=True, input_shape=(seq_length, 1)),
                layers.Dense(1)
            ])
        
        return model
    
    def analyze_gradient_flow(self, model, X, y, num_batches=5):
        """
        Analyze how gradients flow through the model
        """
        gradient_norms = []
        loss_values = []
        
        for i in range(num_batches):
            with tf.GradientTape() as tape:
                predictions = model(X[i:i+1], training=True)
                loss = tf.reduce_mean(tf.square(predictions - y[i:i+1]))
            
            gradients = tape.gradient(loss, model.trainable_variables)
            
            # Calculate gradient norms for each layer
            layer_norms = []
            for grad in gradients:
                if grad is not None:
                    norm = tf.norm(grad).numpy()
                    layer_norms.append(norm)
                else:
                    layer_norms.append(0.0)
            
            gradient_norms.append(layer_norms)
            loss_values.append(loss.numpy())
        
        return np.array(gradient_norms), np.array(loss_values)
    
    def eigenvalue_analysis(self, model, seq_length=100):
        """
        Analyze eigenvalues of recurrent weight matrices
        """
        eigenvalues = {}
        
        for layer in model.layers:
            if hasattr(layer, 'recurrent_kernel'):
                # Get recurrent weight matrix
                W_rec = layer.recurrent_kernel.numpy()
                
                # Compute eigenvalues
                eigvals = np.linalg.eigvals(W_rec)
                eigenvalues[layer.name] = {
                    'eigenvalues': eigvals,
                    'max_eigenvalue': np.max(np.abs(eigvals)),
                    'spectral_radius': np.max(np.abs(eigvals))
                }
        
        return eigenvalues

# 3. Performance Degradation Analyzer
class PerformanceDegradationAnalyzer:
    """
    Analyze how performance degrades with sequence length
    """
    
    def __init__(self):
        self.results = {}
    
    def test_sequence_lengths(self, task_generator, lengths=[10, 25, 50, 100, 200], 
                            model_types=['SimpleRNN', 'LSTM', 'GRU']):
        """
        Test performance across different sequence lengths
        """
        results = {}
        
        for model_type in model_types:
            results[model_type] = {}
            
            for length in lengths:
                print(f"Testing {model_type} with sequence length {length}...")
                
                # Generate task data
                if hasattr(task_generator, 'copy_task'):
                    X, y = task_generator.copy_task(seq_length=length, 
                                                  gap_length=length//2, 
                                                  num_samples=500)
                
                # Create and compile model
                if model_type == 'SimpleRNN':
                    model = keras.Sequential([
                        layers.SimpleRNN(32, input_shape=(length, 1)),
                        layers.Dense(length, activation='linear')
                    ])
                elif model_type == 'LSTM':
                    model = keras.Sequential([
                        layers.LSTM(32, input_shape=(length, 1)),
                        layers.Dense(length, activation='linear')
                    ])
                elif model_type == 'GRU':
                    model = keras.Sequential([
                        layers.GRU(32, input_shape=(length, 1)),
                        layers.Dense(length, activation='linear')
                    ])
                
                model.compile(optimizer='adam', loss='mse', metrics=['mae'])
                
                # Reshape data for RNN
                X_reshaped = X.reshape(X.shape[0], X.shape[1], 1)
                
                # Split data
                split_idx = int(0.8 * len(X_reshaped))
                X_train, X_val = X_reshaped[:split_idx], X_reshaped[split_idx:]
                y_train, y_val = y[:split_idx], y[split_idx:]
                
                # Train model
                history = model.fit(X_train, y_train, 
                                  validation_data=(X_val, y_val),
                                  epochs=20, batch_size=32, verbose=0)
                
                # Store results
                final_loss = history.history['val_loss'][-1]
                final_mae = history.history['val_mae'][-1]
                
                results[model_type][length] = {
                    'val_loss': final_loss,
                    'val_mae': final_mae,
                    'history': history.history
                }
        
        self.results = results
        return results

# Initialize analyzers
task_generator = LongTermDependencyTasks(vocab_size=8)
gradient_analyzer = GradientFlowAnalyzer()
degradation_analyzer = PerformanceDegradationAnalyzer()

print("Long-term dependency analyzers initialized!")
print("Ready to test sequence modeling capabilities across different architectures!")

# Generate sample tasks for initial analysis
print("\nGenerating sample long-term dependency tasks...")

# Copy task with varying gap lengths
copy_X_short, copy_y_short = task_generator.copy_task(seq_length=50, gap_length=20, num_samples=100)
copy_X_long, copy_y_long = task_generator.copy_task(seq_length=150, gap_length=100, num_samples=100)

# Adding task
add_X, add_y = task_generator.adding_task(seq_length=100, num_samples=100)

print(f"Copy task (short): X shape = {copy_X_short.shape}, y shape = {copy_y_short.shape}")
print(f"Copy task (long): X shape = {copy_X_long.shape}, y shape = {copy_y_long.shape}")
print(f"Adding task: X shape = {add_X.shape}, y shape = {add_y.shape}")

# Visualize task examples
plt.figure(figsize=(16, 10))

# Copy task visualization (short)
plt.subplot(2, 3, 1)
sample_idx = 0
plt.plot(copy_X_short[sample_idx], 'b-', label='Input', alpha=0.7)
plt.plot(copy_y_short[sample_idx], 'r--', label='Target', alpha=0.7)
plt.title('Copy Task (Short Gap)')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.legend()

# Copy task visualization (long)
plt.subplot(2, 3, 2)
plt.plot(copy_X_long[sample_idx], 'b-', label='Input', alpha=0.7)
plt.plot(copy_y_long[sample_idx], 'r--', label='Target', alpha=0.7)
plt.title('Copy Task (Long Gap)')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.legend()

# Adding task visualization
plt.subplot(2, 3, 3)
plt.plot(add_X[sample_idx, :, 0], 'b-', label='Values', alpha=0.7)
plt.plot(add_X[sample_idx, :, 1] * 5, 'ro', label='Markers', alpha=0.7, markersize=3)
plt.title(f'Adding Task (Target: {add_y[sample_idx]:.3f})')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.legend()

# Task complexity analysis
plt.subplot(2, 3, 4)
tasks = ['Copy Short', 'Copy Long', 'Adding']
gaps = [20, 100, 50]  # Effective gap lengths
plt.bar(tasks, gaps, alpha=0.7, color='skyblue')
plt.title('Task Complexity (Gap Length)')
plt.ylabel('Gap Length')
plt.xticks(rotation=45)

# Information requirements
plt.subplot(2, 3, 5)
info_bits = [5, 5, 2]  # Bits of information to remember
plt.bar(tasks, info_bits, alpha=0.7, color='lightcoral')
plt.title('Information to Remember (bits)')
plt.ylabel('Information Content')
plt.xticks(rotation=45)

# Expected difficulty
plt.subplot(2, 3, 6)
difficulty = [2, 4, 3]  # Subjective difficulty ranking
colors = ['green', 'red', 'orange']
bars = plt.bar(tasks, difficulty, alpha=0.7, color=colors)
plt.title('Expected Difficulty')
plt.ylabel('Difficulty Level')
plt.xticks(rotation=45)

for bar, diff in zip(bars, difficulty):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
             f'{diff}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

print(f"\nLong-term Dependency Tasks Generated!")
print(f"Ready to analyze gradient flow and performance degradation!")
print(f"Tasks cover a range of dependency lengths and complexity levels!")
