In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("GRU Text Generation Systems initialized!")

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# 1. GRU Text Generator with Performance Optimization
class OptimizedGRUGenerator:
    """
    Memory and computationally efficient GRU-based text generator
    """
    
    def __init__(self, vocab_size, embedding_dim=128, gru_units=256, num_layers=2):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.gru_units = gru_units
        self.num_layers = num_layers
        self.model = None
        
    def build_model(self, sequence_length):
        """
        Build optimized GRU model for text generation
        """
        model = keras.Sequential([
            layers.Embedding(self.vocab_size, self.embedding_dim, 
                           input_length=sequence_length),
            layers.Dropout(0.2)
        ])
        
        # Add GRU layers
        for i in range(self.num_layers):
            return_sequences = (i < self.num_layers - 1)
            model.add(layers.GRU(
                self.gru_units,
                return_sequences=return_sequences,
                dropout=0.2,
                recurrent_dropout=0.2
            ))
            
            if return_sequences:
                model.add(layers.Dropout(0.2))
        
        # Output layer with optimized activation
        model.add(layers.Dense(self.vocab_size, activation='softmax'))
        
        self.model = model
        return model
    
    def build_bidirectional_model(self, sequence_length):
        """
        Build bidirectional GRU model for enhanced context understanding
        """
        model = keras.Sequential([
            layers.Embedding(self.vocab_size, self.embedding_dim, 
                           input_length=sequence_length),
            layers.Dropout(0.2),
            layers.Bidirectional(layers.GRU(
                self.gru_units//2,  # Halve units since bidirectional doubles them
                return_sequences=True,
                dropout=0.2,
                recurrent_dropout=0.2
            )),
            layers.Dropout(0.3),
            layers.GRU(self.gru_units//2, dropout=0.2, recurrent_dropout=0.2),
            layers.Dense(self.vocab_size, activation='softmax')
        ])
        
        self.model = model
        return model

# 2. Performance Benchmark System
class GenerationBenchmark:
    """
    Benchmark system for comparing generation performance
    """
    
    def __init__(self):
        self.results = {}
    
    def benchmark_models(self, models_dict, test_data, sequence_length=50):
        """
        Benchmark multiple models for generation performance
        """
        results = {}
        
        for name, model in models_dict.items():
            print(f"Benchmarking {name}...")
            
            # Measure training time
            start_time = time.time()
            
            # Simple training (reduced for demo)
            X, y = test_data
            model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
            history = model.fit(X[:100], y[:100], epochs=3, batch_size=16, verbose=0)
            
            training_time = time.time() - start_time
            
            # Measure inference time
            start_time = time.time()
            sample_input = X[:10]
            predictions = model.predict(sample_input, verbose=0)
            inference_time = time.time() - start_time
            
            # Calculate model size
            param_count = model.count_params()
            
            # Store results
            results[name] = {
                'training_time': training_time,
                'inference_time': inference_time,
                'param_count': param_count,
                'final_accuracy': history.history['accuracy'][-1],
                'final_loss': history.history['loss'][-1]
            }
            
            print(f"  Training time: {training_time:.2f}s")
            print(f"  Inference time: {inference_time:.4f}s")
            print(f"  Parameters: {param_count:,}")
        
        self.results = results
        return results
    
    def visualize_benchmark(self):
        """
        Visualize benchmark results
        """
        if not self.results:
            print("No benchmark results to visualize")
            return
        
        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
        
        names = list(self.results.keys())
        
        # Training time comparison
        training_times = [self.results[name]['training_time'] for name in names]
        axes[0, 0].bar(names, training_times, alpha=0.7, color='skyblue')
        axes[0, 0].set_title('Training Time Comparison')
        axes[0, 0].set_ylabel('Time (seconds)')
        axes[0, 0].tick_params(axis='x', rotation=45)
        
        # Inference time comparison
        inference_times = [self.results[name]['inference_time'] for name in names]
        axes[0, 1].bar(names, inference_times, alpha=0.7, color='lightcoral')
        axes[0, 1].set_title('Inference Time Comparison')
        axes[0, 1].set_ylabel('Time (seconds)')
        axes[0, 1].tick_params(axis='x', rotation=45)
        
        # Parameter count comparison
        param_counts = [self.results[name]['param_count'] for name in names]
        axes[0, 2].bar(names, param_counts, alpha=0.7, color='lightgreen')
        axes[0, 2].set_title('Parameter Count Comparison')
        axes[0, 2].set_ylabel('Number of Parameters')
        axes[0, 2].tick_params(axis='x', rotation=45)
        
        # Accuracy comparison
        accuracies = [self.results[name]['final_accuracy'] for name in names]
        axes[1, 0].bar(names, accuracies, alpha=0.7, color='orange')
        axes[1, 0].set_title('Final Accuracy Comparison')
        axes[1, 0].set_ylabel('Accuracy')
        axes[1, 0].tick_params(axis='x', rotation=45)
        
        # Efficiency analysis (accuracy per parameter)
        efficiency = [acc / (params / 1000) for acc, params in zip(accuracies, param_counts)]
        axes[1, 1].bar(names, efficiency, alpha=0.7, color='purple')
        axes[1, 1].set_title('Efficiency (Accuracy per 1K Params)')
        axes[1, 1].set_ylabel('Efficiency Score')
        axes[1, 1].tick_params(axis='x', rotation=45)
        
        # Speed vs Accuracy trade-off
        axes[1, 2].scatter(training_times, accuracies, s=100, alpha=0.7)
        for i, name in enumerate(names):
            axes[1, 2].annotate(name, (training_times[i], accuracies[i]), 
                              xytext=(5, 5), textcoords='offset points')
        axes[1, 2].set_xlabel('Training Time (s)')
        axes[1, 2].set_ylabel('Final Accuracy')
        axes[1, 2].set_title('Speed vs Accuracy Trade-off')
        
        plt.tight_layout()
        plt.show()

# 3. Create sample data and models for comparison
def create_sample_text_data(vocab_size=50, seq_length=30, num_samples=200):
    """
    Create sample text data for benchmarking
    """
    # Generate random sequences that simulate text
    X = np.random.randint(1, vocab_size, size=(num_samples, seq_length))
    y = np.random.randint(0, vocab_size, size=(num_samples, seq_length))
    return X, y

# Generate sample data
vocab_size = 50
sequence_length = 30
X_sample, y_sample = create_sample_text_data(vocab_size, sequence_length, 200)

print(f"Sample data shape: X={X_sample.shape}, y={y_sample.shape}")

# Create different GRU model variants
gru_generator = OptimizedGRUGenerator(vocab_size, embedding_dim=64, gru_units=128, num_layers=2)

models_to_compare = {}

# Standard GRU
models_to_compare['Standard_GRU'] = gru_generator.build_model(sequence_length)

# Bidirectional GRU
gru_generator_bi = OptimizedGRUGenerator(vocab_size, embedding_dim=64, gru_units=128, num_layers=1)
models_to_compare['Bidirectional_GRU'] = gru_generator_bi.build_bidirectional_model(sequence_length)

# LSTM for comparison
lstm_model = keras.Sequential([
    layers.Embedding(vocab_size, 64, input_length=sequence_length),
    layers.Dropout(0.2),
    layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    layers.Dense(vocab_size, activation='softmax')
])
models_to_compare['LSTM_Baseline'] = lstm_model

# SimpleRNN for comparison
rnn_model = keras.Sequential([
    layers.Embedding(vocab_size, 64, input_length=sequence_length),
    layers.Dropout(0.2),
    layers.SimpleRNN(128, dropout=0.2),
    layers.Dense(vocab_size, activation='softmax')
])
models_to_compare['SimpleRNN_Baseline'] = rnn_model

print(f"Created {len(models_to_compare)} models for comparison:")
for name, model in models_to_compare.items():
    print(f"  {name}: {model.count_params():,} parameters")

# Run benchmark
benchmark = GenerationBenchmark()
print(f"\nRunning performance benchmark...")
benchmark_results = benchmark.benchmark_models(models_to_compare, (X_sample, y_sample))

# Visualize results
print(f"\nGenerating benchmark visualization...")
benchmark.visualize_benchmark()

# Print detailed analysis
print(f"\nGRU vs LSTM Performance Analysis:")
print("=" * 50)

for name, result in benchmark_results.items():
    print(f"\n{name}:")
    print(f"  Training Time: {result['training_time']:.3f}s")
    print(f"  Inference Time: {result['inference_time']:.4f}s")
    print(f"  Parameters: {result['param_count']:,}")
    print(f"  Final Accuracy: {result['final_accuracy']:.4f}")
    print(f"  Efficiency: {result['final_accuracy'] / (result['param_count'] / 1000):.4f}")

# Find best performers
best_speed = min(benchmark_results.keys(), key=lambda x: benchmark_results[x]['training_time'])
best_accuracy = max(benchmark_results.keys(), key=lambda x: benchmark_results[x]['final_accuracy'])
most_efficient = max(benchmark_results.keys(), 
                    key=lambda x: benchmark_results[x]['final_accuracy'] / (benchmark_results[x]['param_count'] / 1000))

print(f"\nPerformance Leaders:")
print(f"Fastest Training: {best_speed}")
print(f"Highest Accuracy: {best_accuracy}")
print(f"Most Efficient: {most_efficient}")

print(f"\nKey Insights:")
print("- GRU typically offers good balance of speed and performance")
print("- Bidirectional models provide better context but with computational cost")
print("- LSTM may achieve slightly higher accuracy but requires more parameters")
print("- SimpleRNN is fastest but may struggle with longer dependencies")

# Performance optimization tips
print(f"\nGRU Optimization Tips:")
print("1. Use smaller embedding dimensions for faster training")
print("2. Bidirectional GRUs for better context understanding")
print("3. Gradient clipping for training stability")
print("4. Mixed precision training for inference speed")
print("5. Model quantization for deployment optimization")

print(f"\nGRU Text Generation Systems Analysis Complete!")
print(f"Ready for hybrid models and advanced optimization techniques!")
