In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("LSTM Variations Comparison initialized!")

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# 1. LSTM Variant Architectures
def create_lstm_variants(input_shape, units=64, num_classes=3):
    """
    Create different LSTM variant architectures
    """
    models = {}
    
    # Standard LSTM
    models['Standard_LSTM'] = keras.Sequential([
        layers.LSTM(units, return_sequences=False),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='Standard_LSTM')
    
    # Bidirectional LSTM
    models['Bidirectional_LSTM'] = keras.Sequential([
        layers.Bidirectional(layers.LSTM(units//2, return_sequences=False)),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='Bidirectional_LSTM')
    
    # Stacked LSTM
    models['Stacked_LSTM'] = keras.Sequential([
        layers.LSTM(units, return_sequences=True),
        layers.LSTM(units//2, return_sequences=False),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='Stacked_LSTM')
    
    # GRU for comparison
    models['GRU'] = keras.Sequential([
        layers.GRU(units, return_sequences=False),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='GRU')
    
    # Bidirectional GRU
    models['Bidirectional_GRU'] = keras.Sequential([
        layers.Bidirectional(layers.GRU(units//2, return_sequences=False)),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='Bidirectional_GRU')
    
    return models

# 2. Performance Analysis Framework
class LSTMVariantAnalyzer:
    """
    Analyze and compare different LSTM variants
    """
    
    def __init__(self):
        self.results = {}
        
    def compare_architectures(self, models, X_train, y_train, X_val, y_val, epochs=10):
        """
        Train and compare different LSTM variants
        """
        results = {}
        
        for name, model in models.items():
            print(f"\nTraining {name}...")
            
            # Compile model
            model.compile(
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )
            
            # Train model
            history = model.fit(
                X_train, y_train,
                validation_data=(X_val, y_val),
                epochs=epochs,
                batch_size=32,
                verbose=0
            )
            
            # Store results
            results[name] = {
                'model': model,
                'history': history.history,
                'params': model.count_params(),
                'final_val_acc': history.history['val_accuracy'][-1],
                'final_val_loss': history.history['val_loss'][-1]
            }
            
            print(f"Final validation accuracy: {history.history['val_accuracy'][-1]:.4f}")
        
        self.results = results
        return results
    
    def visualize_comparison(self):
        """
        Create comprehensive comparison visualization
        """
        if not self.results:
            print("No results to visualize. Run comparison first.")
            return
        
        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
        
        # Training accuracy curves
        axes[0, 0].set_title('Training Accuracy')
        for name, result in self.results.items():
            axes[0, 0].plot(result['history']['accuracy'], label=name, alpha=0.7)
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Accuracy')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # Validation accuracy curves
        axes[0, 1].set_title('Validation Accuracy')
        for name, result in self.results.items():
            axes[0, 1].plot(result['history']['val_accuracy'], label=name, alpha=0.7)
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Accuracy')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        # Final performance comparison
        names = list(self.results.keys())
        final_accs = [self.results[name]['final_val_acc'] for name in names]
        
        bars = axes[0, 2].bar(range(len(names)), final_accs, alpha=0.7)
        axes[0, 2].set_title('Final Validation Accuracy')
        axes[0, 2].set_ylabel('Accuracy')
        axes[0, 2].set_xticks(range(len(names)))
        axes[0, 2].set_xticklabels(names, rotation=45, ha='right')
        
        # Add value labels
        for bar, acc in zip(bars, final_accs):
            axes[0, 2].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005,
                           f'{acc:.3f}', ha='center', va='bottom')
        
        # Parameter count comparison
        param_counts = [self.results[name]['params'] for name in names]
        axes[1, 0].bar(range(len(names)), param_counts, alpha=0.7, color='orange')
        axes[1, 0].set_title('Parameter Count')
        axes[1, 0].set_ylabel('Number of Parameters')
        axes[1, 0].set_xticks(range(len(names)))
        axes[1, 0].set_xticklabels(names, rotation=45, ha='right')
        
        # Efficiency analysis (accuracy per parameter)
        efficiency = [acc / (params / 1000) for acc, params in zip(final_accs, param_counts)]
        axes[1, 1].bar(range(len(names)), efficiency, alpha=0.7, color='green')
        axes[1, 1].set_title('Efficiency (Accuracy per 1K Parameters)')
        axes[1, 1].set_ylabel('Efficiency Score')
        axes[1, 1].set_xticks(range(len(names)))
        axes[1, 1].set_xticklabels(names, rotation=45, ha='right')
        
        # Training loss comparison
        axes[1, 2].set_title('Training Loss')
        for name, result in self.results.items():
            axes[1, 2].plot(result['history']['loss'], label=name, alpha=0.7)
        axes[1, 2].set_xlabel('Epoch')
        axes[1, 2].set_ylabel('Loss')
        axes[1, 2].legend()
        axes[1, 2].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

# Create sample data for comparison
def create_sample_sequence_data(num_samples=1000, seq_length=50, features=10):
    """
    Create sample sequence data for LSTM variant comparison
    """
    # Generate sequences with different patterns
    X = []
    y = []
    
    for i in range(num_samples):
        # Create different sequence patterns
        if i % 3 == 0:
            # Pattern 1: Increasing trend
            seq = np.random.randn(seq_length, features) + np.linspace(0, 2, seq_length).reshape(-1, 1)
            label = 0
        elif i % 3 == 1:
            # Pattern 2: Decreasing trend
            seq = np.random.randn(seq_length, features) + np.linspace(2, 0, seq_length).reshape(-1, 1)
            label = 1
        else:
            # Pattern 3: Oscillating pattern
            seq = np.random.randn(seq_length, features) + np.sin(np.linspace(0, 4*np.pi, seq_length)).reshape(-1, 1)
            label = 2
        
        X.append(seq)
        y.append(label)
    
    return np.array(X), np.array(y)

# Generate sample data
print("Generating sample sequence data...")
X, y = create_sample_sequence_data(num_samples=800, seq_length=30, features=8)

# Split data
split_idx = int(0.8 * len(X))
X_train, X_val = X[:split_idx], X[split_idx:]
y_train, y_val = y[:split_idx], y[split_idx:]

print(f"Training data shape: {X_train.shape}")
print(f"Validation data shape: {X_val.shape}")

# Create LSTM variants
input_shape = X_train.shape[1:]
lstm_variants = create_lstm_variants(input_shape, units=64, num_classes=3)

print(f"\nCreated {len(lstm_variants)} LSTM variants:")
for name, model in lstm_variants.items():
    print(f"- {name}: {model.count_params():,} parameters")

# Initialize analyzer and run comparison
analyzer = LSTMVariantAnalyzer()
print(f"\nRunning LSTM variant comparison...")

# Compare architectures
results = analyzer.compare_architectures(
    lstm_variants, X_train, y_train, X_val, y_val, epochs=15
)

# Visualize results
print(f"\nGenerating comparison visualization...")
analyzer.visualize_comparison()

# Print detailed analysis
print(f"\nLSTM Variant Analysis Summary:")
print("=" * 50)

best_accuracy = 0
best_model = ""
best_efficiency = 0
most_efficient = ""

for name, result in results.items():
    acc = result['final_val_acc']
    params = result['params']
    efficiency = acc / (params / 1000)
    
    print(f"\n{name}:")
    print(f"  Final Accuracy: {acc:.4f}")
    print(f"  Parameters: {params:,}")
    print(f"  Efficiency: {efficiency:.4f}")
    
    if acc > best_accuracy:
        best_accuracy = acc
        best_model = name
    
    if efficiency > best_efficiency:
        best_efficiency = efficiency
        most_efficient = name

print(f"\nBest performing model: {best_model} ({best_accuracy:.4f} accuracy)")
print(f"Most efficient model: {most_efficient} ({best_efficiency:.4f} efficiency)")

print(f"\nKey Insights:")
print("- Bidirectional models often perform better but use more parameters")
print("- GRU can be more efficient than LSTM with similar performance")
print("- Stacked architectures provide hierarchical learning capabilities")
print("- Model choice depends on task complexity and computational constraints")

print(f"\nLSTM Variations Comparison Complete!")
print(f"Ready for attention fusion with LSTM architectures!")
