In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("Short-term memory analysis initialized!")

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [None]:
# 1. Memory Capacity Testing Framework
class MemoryCapacityTester:
    """
    A framework for testing memory capacity of RNN architectures
    """
    
    def __init__(self, sequence_length=50, vocab_size=10):
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        
    def generate_memory_task(self, task_type="copy", delay=10, num_samples=1000):
        """
        Generate different memory tasks to test RNN capabilities
        """
        X = []
        y = []
        
        if task_type == "copy":
            # Copy task: remember and reproduce a sequence after delay
            for _ in range(num_samples):
                # Generate random sequence
                sequence = np.random.randint(1, self.vocab_size, size=self.sequence_length//2)
                
                # Add delimiter and padding
                full_sequence = np.concatenate([
                    sequence,
                    [0] * delay,  # Delimiter/padding
                    [0] * (self.sequence_length//2)  # Space for output
                ])
                
                # Target is the original sequence
                target = np.concatenate([
                    [0] * (self.sequence_length//2 + delay),  # No output during input
                    sequence  # Reproduce the sequence
                ])
                
                X.append(full_sequence[:self.sequence_length])
                y.append(target[:self.sequence_length])
                
        elif task_type == "reverse":
            # Reverse task: remember and reproduce sequence in reverse order
            for _ in range(num_samples):
                sequence = np.random.randint(1, self.vocab_size, size=self.sequence_length//2)
                
                full_sequence = np.concatenate([
                    sequence,
                    [0] * delay,
                    [0] * (self.sequence_length//2)
                ])
                
                target = np.concatenate([
                    [0] * (self.sequence_length//2 + delay),
                    sequence[::-1]  # Reversed sequence
                ])
                
                X.append(full_sequence[:self.sequence_length])
                y.append(target[:self.sequence_length])
                
        elif task_type == "selective":
            # Selective memory: remember only specific elements
            for _ in range(num_samples):
                sequence = np.random.randint(1, self.vocab_size, size=self.sequence_length//2)
                # Mark important elements with high values
                important_indices = np.random.choice(len(sequence), size=3, replace=False)
                sequence[important_indices] += self.vocab_size
                
                full_sequence = np.concatenate([
                    sequence,
                    [0] * delay,
                    [0] * (self.sequence_length//2)
                ])
                
                # Target contains only the important elements
                target_seq = np.zeros(self.sequence_length//2)
                target_seq[important_indices] = sequence[important_indices] - self.vocab_size
                
                target = np.concatenate([
                    [0] * (self.sequence_length//2 + delay),
                    target_seq
                ])
                
                X.append(full_sequence[:self.sequence_length])
                y.append(target[:self.sequence_length])
        
        return np.array(X), np.array(y)
    
    def create_long_dependency_task(self, dependency_length=20):
        """
        Create task requiring long-term dependencies
        """
        X = []
        y = []
        
        for _ in range(1000):
            # Create sequence with important information at the beginning
            important_info = np.random.randint(1, self.vocab_size)
            
            sequence = np.random.randint(1, self.vocab_size, size=self.sequence_length)
            sequence[0] = important_info  # Important info at start
            
            # Target depends on the first element
            target = np.zeros(self.sequence_length)
            target[-1] = important_info  # Must remember from beginning to end
            
            X.append(sequence)
            y.append(target)
        
        return np.array(X), np.array(y)

# 2. Gradient Flow Analysis
class GradientFlowAnalyzer:
    """
    Analyze gradient flow through RNN architectures
    """
    
    def __init__(self, model):
        self.model = model
        self.gradients = []
        
    def compute_gradients(self, X, y):
        """
        Compute gradients for a batch of data
        """
        with tf.GradientTape() as tape:
            predictions = self.model(X, training=True)
            loss = tf.keras.losses.sparse_categorical_crossentropy(y, predictions)
            loss = tf.reduce_mean(loss)
        
        gradients = tape.gradient(loss, self.model.trainable_variables)
        return gradients, loss
    
    def analyze_gradient_norms(self, X, y, num_batches=10):
        """
        Analyze gradient norms across different layers
        """
        gradient_norms = []
        
        for i in range(num_batches):
            batch_X = X[i*32:(i+1)*32]
            batch_y = y[i*32:(i+1)*32]
            
            gradients, loss = self.compute_gradients(batch_X, batch_y)
            
            batch_norms = []
            for grad in gradients:
                if grad is not None:
                    norm = tf.norm(grad).numpy()
                    batch_norms.append(norm)
                else:
                    batch_norms.append(0.0)
            
            gradient_norms.append(batch_norms)
        
        return np.array(gradient_norms)

# Initialize memory tester
memory_tester = MemoryCapacityTester(sequence_length=40, vocab_size=8)

# Generate different memory tasks
print("Generating memory tasks...")
copy_X, copy_y = memory_tester.generate_memory_task("copy", delay=10, num_samples=500)
reverse_X, reverse_y = memory_tester.generate_memory_task("reverse", delay=5, num_samples=500)
selective_X, selective_y = memory_tester.generate_memory_task("selective", delay=8, num_samples=500)
long_dep_X, long_dep_y = memory_tester.create_long_dependency_task(dependency_length=30)

print(f"Copy task shapes: X={copy_X.shape}, y={copy_y.shape}")
print(f"Reverse task shapes: X={reverse_X.shape}, y={reverse_y.shape}")
print(f"Selective task shapes: X={selective_X.shape}, y={selective_y.shape}")
print(f"Long dependency task shapes: X={long_dep_X.shape}, y={long_dep_y.shape}")

# Visualize memory tasks
plt.figure(figsize=(16, 10))

# Copy task visualization
plt.subplot(2, 4, 1)
sample_idx = 0
plt.plot(copy_X[sample_idx], 'b-', label='Input', alpha=0.7)
plt.plot(copy_y[sample_idx], 'r--', label='Target', alpha=0.7)
plt.title('Copy Task Example')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.legend()

# Reverse task visualization  
plt.subplot(2, 4, 2)
plt.plot(reverse_X[sample_idx], 'b-', label='Input', alpha=0.7)
plt.plot(reverse_y[sample_idx], 'r--', label='Target', alpha=0.7)
plt.title('Reverse Task Example')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.legend()

# Selective memory task
plt.subplot(2, 4, 3)
plt.plot(selective_X[sample_idx], 'b-', label='Input', alpha=0.7)
plt.plot(selective_y[sample_idx], 'r--', label='Target', alpha=0.7)
plt.title('Selective Memory Task')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.legend()

# Long dependency task
plt.subplot(2, 4, 4)
plt.plot(long_dep_X[sample_idx], 'b-', label='Input', alpha=0.7)
plt.plot(long_dep_y[sample_idx], 'r--', label='Target', alpha=0.7)
plt.title('Long Dependency Task')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.legend()

# Task complexity analysis
plt.subplot(2, 4, 5)
tasks = ['Copy', 'Reverse', 'Selective', 'Long Dep']
input_complexity = [
    np.std(copy_X.flatten()),
    np.std(reverse_X.flatten()),
    np.std(selective_X.flatten()),
    np.std(long_dep_X.flatten())
]
plt.bar(tasks, input_complexity, color='skyblue', alpha=0.7)
plt.title('Input Complexity (Std Dev)')
plt.ylabel('Standard Deviation')
plt.xticks(rotation=45)

# Memory requirement analysis
plt.subplot(2, 4, 6)
memory_requirements = [10, 15, 12, 35]  # Estimated memory steps needed
plt.bar(tasks, memory_requirements, color='lightcoral', alpha=0.7)
plt.title('Estimated Memory Requirements')
plt.ylabel('Memory Steps')
plt.xticks(rotation=45)

# Information flow analysis
plt.subplot(2, 4, 7)
# Calculate information flow (entropy-based measure)
def calculate_entropy(data):
    unique, counts = np.unique(data.flatten(), return_counts=True)
    probs = counts / counts.sum()
    return -np.sum(probs * np.log2(probs + 1e-10))

entropies = [
    calculate_entropy(copy_X),
    calculate_entropy(reverse_X),
    calculate_entropy(selective_X),
    calculate_entropy(long_dep_X)
]

plt.bar(tasks, entropies, color='lightgreen', alpha=0.7)
plt.title('Information Content (Entropy)')
plt.ylabel('Entropy (bits)')
plt.xticks(rotation=45)

# Task difficulty ranking
plt.subplot(2, 4, 8)
difficulty_scores = [2, 3, 4, 5]  # Subjective difficulty ranking
colors = ['green', 'yellow', 'orange', 'red']
bars = plt.bar(tasks, difficulty_scores, color=colors, alpha=0.7)
plt.title('Estimated Task Difficulty')
plt.ylabel('Difficulty Score')
plt.xticks(rotation=45)

# Add difficulty labels
for bar, score in zip(bars, difficulty_scores):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
             f'{score}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

print(f"\nMemory Task Analysis Complete!")
print(f"Generated {len(copy_X)} samples for each task type")
print(f"Tasks range from simple copy to complex long-term dependencies")
print(f"Ready to test RNN memory capabilities!")
