In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import re
import string
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("Libraries imported successfully!")

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [None]:
# Recreate sentiment dataset from fundamentals notebook
def create_sentiment_dataset():
    """Create the same sentiment dataset for consistency"""
    positive_texts = [
        "I absolutely love this product! It exceeded all my expectations.",
        "Amazing quality and fast delivery. Highly recommend!",
        "This is the best purchase I've made in years. Perfect!",
        "Outstanding customer service and excellent product quality.",
        "I'm so happy with this purchase. Will definitely buy again!",
        "Fantastic! Everything arrived on time and in perfect condition.",
        "Great value for money. The product works perfectly.",
        "Superb quality and amazing features. Love it!",
        "Excellent product! Exactly what I was looking for.",
        "Perfect fit and great design. Very satisfied!",
        "This product is incredible! Exceeds expectations.",
        "Beautiful design and excellent functionality. Recommended!",
        "Top quality! Fast shipping and great packaging.",
        "Love the features and ease of use. Perfect!",
        "Outstanding value and performance. Very happy!",
        "Brilliant product! Works exactly as described.",
        "Impressive quality and attention to detail.",
        "Wonderful experience from start to finish!",
        "This is exactly what I needed. Perfect solution!",
        "Excellent craftsmanship and great customer support.",
        "Amazing results! Better than expected.",
        "High quality materials and excellent build.",
        "Perfect for my needs. Great functionality!",
        "Superb performance and reliable operation.",
        "Love the innovation and thoughtful design."
    ]
    
    negative_texts = [
        "Terrible product! Complete waste of money.",
        "Poor quality and disappointing performance. Avoid!",
        "This is the worst purchase I've ever made.",
        "Awful customer service and defective product.",
        "I hate this product. Nothing works as advertised.",
        "Horrible experience. Product broke immediately.",
        "Overpriced and poor quality. Very disappointed.",
        "Useless product with terrible design flaws.",
        "Don't buy this! It's a complete scam.",
        "Worst quality I've ever seen. Returned immediately.",
        "Broken on arrival and no customer support.",
        "This product is garbage. Save your money!",
        "Terrible build quality and false advertising.",
        "Complete disappointment. Nothing works properly.",
        "Poor materials and shoddy construction.",
        "Defective product with misleading description.",
        "Nightmare experience with this purchase.",
        "Cheap quality and overpriced. Avoid at all costs!",
        "Broken after one day. Terrible reliability.",
        "Worst customer service experience ever.",
        "Product failed completely. Total waste.",
        "Inferior quality and poor performance.",
        "Disappointing results and bad value.",
        "Unreliable and poorly designed product.",
        "Frustrated with poor quality and service."
    ]
    
    neutral_texts = [
        "The product is okay. Nothing special but works.",
        "Average quality for the price. Could be better.",
        "It's fine. Does what it's supposed to do.",
        "Decent product with standard features.",
        "Regular quality item. Nothing extraordinary.",
        "Standard product with basic functionality.",
        "It works as described. No surprises.",
        "Average performance and typical quality.",
        "Basic product that meets minimum requirements.",
        "Normal quality for this price range.",
        "The product is acceptable but not outstanding.",
        "Standard features and regular performance.",
        "It's an ordinary product with basic design.",
        "Typical quality and standard delivery.",
        "Regular item with expected functionality.",
        "Average build quality and normal features.",
        "Standard product that works adequately.",
        "Basic design with typical performance.",
        "It's a regular product with normal quality.",
        "Standard functionality and average materials.",
        "Ordinary product with expected features.",
        "Normal quality and standard performance.",
        "Basic item that meets requirements.",
        "Average product with typical characteristics.",
        "Standard quality and regular functionality."
    ]
    
    mixed_texts = [
        "Good product but expensive for what you get.",
        "Fast delivery but product quality could be better.",
        "Great customer service but the product is average.",
        "Love the design but functionality is limited.",
        "Excellent packaging but disappointing content.",
        "Quick shipping but poor build quality.",
        "Nice features but overpriced for the value.",
        "Good concept but poor execution.",
        "Beautiful appearance but lacks durability.",
        "Helpful support but defective product."
    ]
    
    texts = positive_texts + negative_texts + neutral_texts + mixed_texts
    labels = (['positive'] * len(positive_texts) + 
              ['negative'] * len(negative_texts) + 
              ['neutral'] * (len(neutral_texts) + len(mixed_texts)))
    
    df = pd.DataFrame({'text': texts, 'sentiment': labels})
    return df.sample(frac=1, random_state=42).reset_index(drop=True)

# Create dataset
sentiment_df = create_sentiment_dataset()
print(f"Dataset created with {len(sentiment_df)} samples")
print(f"Class distribution:\n{sentiment_df['sentiment'].value_counts()}")

# Basic preprocessing for RNN
def preprocess_for_rnn(text):
    """Minimal preprocessing for RNN - preserve structure"""
    text = str(text).lower()
    text = re.sub(r'[^\w\s\.\!\?]', '', text)  # Keep basic punctuation
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Preprocess texts
processed_texts = [preprocess_for_rnn(text) for text in sentiment_df['text']]

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(sentiment_df['sentiment'])
num_classes = len(label_encoder.classes_)

print(f"\nLabel encoding:")
for i, label in enumerate(label_encoder.classes_):
    print(f"{i}: {label}")

print(f"Number of classes: {num_classes}")

# Split the data
X_train_text, X_test_text, y_train, y_test = train_test_split(
    processed_texts, encoded_labels, test_size=0.2, random_state=42, stratify=encoded_labels
)

print(f"\nData split:")
print(f"Training samples: {len(X_train_text)}")
print(f"Test samples: {len(X_test_text)}")

# Text tokenization and sequence preparation
MAX_VOCAB_SIZE = 5000
MAX_SEQUENCE_LENGTH = 50

# Create tokenizer
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train_text)

# Convert texts to sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train_text)
X_test_sequences = tokenizer.texts_to_sequences(X_test_text)

# Pad sequences
X_train_padded = pad_sequences(X_train_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post')

# Get actual vocabulary size
vocab_size = min(len(tokenizer.word_index) + 1, MAX_VOCAB_SIZE)

print(f"\nSequence preparation:")
print(f"Vocabulary size: {vocab_size}")
print(f"Max sequence length: {MAX_SEQUENCE_LENGTH}")
print(f"Training sequences shape: {X_train_padded.shape}")
print(f"Test sequences shape: {X_test_padded.shape}")

# Analyze sequence lengths
original_lengths = [len(seq) for seq in X_train_sequences]
print(f"\nSequence length statistics:")
print(f"Mean: {np.mean(original_lengths):.1f}")
print(f"Median: {np.median(original_lengths):.1f}")
print(f"Max: {np.max(original_lengths)}")
print(f"Min: {np.min(original_lengths)}")

# Visualize data preparation
plt.figure(figsize=(15, 8))

# Sequence length distribution
plt.subplot(2, 3, 1)
plt.hist(original_lengths, bins=20, alpha=0.7, edgecolor='black')
plt.axvline(MAX_SEQUENCE_LENGTH, color='red', linestyle='--', label=f'Max Length: {MAX_SEQUENCE_LENGTH}')
plt.xlabel('Sequence Length')
plt.ylabel('Frequency')
plt.title('Original Sequence Length Distribution')
plt.legend()

# Vocabulary frequency
plt.subplot(2, 3, 2)
word_freq = Counter([word for seq in X_train_sequences for word in seq])
top_words = word_freq.most_common(20)
words, freqs = zip(*top_words)
plt.bar(range(len(words)), freqs)
plt.xlabel('Word Index')
plt.ylabel('Frequency')
plt.title('Top 20 Word Frequencies')
plt.xticks(range(0, len(words), 2))

# Class distribution in training set
plt.subplot(2, 3, 3)
train_class_dist = np.bincount(y_train)
test_class_dist = np.bincount(y_test)
class_names = label_encoder.classes_

x = np.arange(len(class_names))
width = 0.35

plt.bar(x - width/2, train_class_dist, width, label='Train', alpha=0.7)
plt.bar(x + width/2, test_class_dist, width, label='Test', alpha=0.7)
plt.xlabel('Class')
plt.ylabel('Count')
plt.title('Class Distribution: Train vs Test')
plt.xticks(x, class_names, rotation=45)
plt.legend()

# Padding visualization
plt.subplot(2, 3, 4)
# Show padding effect on a few sequences
sample_indices = [0, 1, 2, 3, 4]
for i, idx in enumerate(sample_indices):
    seq = X_train_padded[idx]
    non_zero = np.count_nonzero(seq)
    padding = len(seq) - non_zero
    plt.bar(i, non_zero, label='Text' if i == 0 else "", color='skyblue')
    plt.bar(i, padding, bottom=non_zero, label='Padding' if i == 0 else "", color='lightcoral')

plt.xlabel('Sample Index')
plt.ylabel('Sequence Length')
plt.title('Padding Effect on Sample Sequences')
plt.legend()

# Word coverage analysis
plt.subplot(2, 3, 5)
# Calculate how many words are covered by vocabulary
all_words = [word for text in X_train_text for word in text.split()]
unique_words = set(all_words)
covered_words = set([word for word in unique_words if word in tokenizer.word_index and tokenizer.word_index[word] < MAX_VOCAB_SIZE])

coverage = len(covered_words) / len(unique_words) * 100
oov_rate = 100 - coverage

plt.pie([coverage, oov_rate], labels=['Covered', 'OOV'], autopct='%1.1f%%', colors=['lightgreen', 'lightcoral'])
plt.title(f'Vocabulary Coverage\n({len(covered_words)}/{len(unique_words)} words)')

# Sequence padding statistics
plt.subplot(2, 3, 6)
padding_stats = []
labels = []
for i in range(len(X_train_padded)):
    seq = X_train_padded[i]
    padding_count = np.sum(seq == 0)
    padding_stats.append(padding_count)

plt.hist(padding_stats, bins=20, alpha=0.7, edgecolor='black')
plt.xlabel('Number of Padding Tokens')
plt.ylabel('Frequency')
plt.title('Padding Distribution in Sequences')
plt.axvline(np.mean(padding_stats), color='red', linestyle='--', label=f'Mean: {np.mean(padding_stats):.1f}')
plt.legend()

plt.tight_layout()
plt.show()

print(f"\nData preparation complete!")
print(f"Ready to build RNN models for sentiment classification.")
print(f"Target performance to beat: {1/num_classes:.3f} (random baseline)")


In [None]:
# RNN Architecture Factory
def create_rnn_models(vocab_size, max_length, num_classes, embedding_dim=100, rnn_units=64):
    """
    Create different RNN architectures for sentiment classification
    """
    models = {}
    
    # 1. Simple RNN
    models['SimpleRNN'] = keras.Sequential([
        layers.Embedding(vocab_size, embedding_dim, input_length=max_length, mask_zero=True),
        layers.SimpleRNN(rnn_units, dropout=0.2, recurrent_dropout=0.2),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='SimpleRNN')
    
    # 2. LSTM
    models['LSTM'] = keras.Sequential([
        layers.Embedding(vocab_size, embedding_dim, input_length=max_length, mask_zero=True),
        layers.LSTM(rnn_units, dropout=0.2, recurrent_dropout=0.2),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='LSTM')
    
    # 3. GRU
    models['GRU'] = keras.Sequential([
        layers.Embedding(vocab_size, embedding_dim, input_length=max_length, mask_zero=True),
        layers.GRU(rnn_units, dropout=0.2, recurrent_dropout=0.2),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='GRU')
    
    # 4. Bidirectional LSTM
    models['BiLSTM'] = keras.Sequential([
        layers.Embedding(vocab_size, embedding_dim, input_length=max_length, mask_zero=True),
        layers.Bidirectional(layers.LSTM(rnn_units//2, dropout=0.2, recurrent_dropout=0.2)),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='BiLSTM')
    
    # 5. Bidirectional GRU
    models['BiGRU'] = keras.Sequential([
        layers.Embedding(vocab_size, embedding_dim, input_length=max_length, mask_zero=True),
        layers.Bidirectional(layers.GRU(rnn_units//2, dropout=0.2, recurrent_dropout=0.2)),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='BiGRU')
    
    # 6. Stacked LSTM
    models['StackedLSTM'] = keras.Sequential([
        layers.Embedding(vocab_size, embedding_dim, input_length=max_length, mask_zero=True),
        layers.LSTM(rnn_units, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        layers.LSTM(rnn_units//2, dropout=0.2, recurrent_dropout=0.2),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ], name='StackedLSTM')
    
    return models

# Create all models
rnn_models = create_rnn_models(
    vocab_size=vocab_size,
    max_length=MAX_SEQUENCE_LENGTH,
    num_classes=num_classes,
    embedding_dim=64,  # Smaller embedding for our vocabulary
    rnn_units=64
)

# Display model architectures
print("RNN Model Architectures:")
print("=" * 50)

for name, model in rnn_models.items():
    print(f"\n{name}:")
    print(f"Parameters: {model.count_params():,}")
    model.summary()
    print("-" * 60)

# Compare model complexities
plt.figure(figsize=(15, 8))

# Parameter count comparison
plt.subplot(2, 3, 1)
model_names = list(rnn_models.keys())
param_counts = [model.count_params() for model in rnn_models.values()]
colors = ['skyblue', 'lightcoral', 'lightgreen', 'orange', 'pink', 'yellow']

bars = plt.bar(model_names, param_counts, color=colors)
plt.title('Model Complexity (Parameters)')
plt.ylabel('Number of Parameters')
plt.xticks(rotation=45)

# Add value labels
for bar, count in zip(bars, param_counts):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(param_counts)*0.01,
             f'{count:,}', ha='center', va='bottom', rotation=90)

# Memory estimation
plt.subplot(2, 3, 2)
memory_mb = [params * 4 / (1024 * 1024) for params in param_counts]  # 4 bytes per float32 parameter
plt.bar(model_names, memory_mb, color=colors)
plt.title('Estimated Memory Usage')
plt.ylabel('Memory (MB)')
plt.xticks(rotation=45)

# Layer count analysis
plt.subplot(2, 3, 3)
layer_counts = []
rnn_layer_counts = []

for model in rnn_models.values():
    total_layers = len(model.layers)
    rnn_layers = sum(1 for layer in model.layers if any(rnn_type in str(type(layer)) 
                    for rnn_type in ['LSTM', 'GRU', 'SimpleRNN', 'Bidirectional']))
    layer_counts.append(total_layers)
    rnn_layer_counts.append(rnn_layers)

x = np.arange(len(model_names))
width = 0.35

plt.bar(x - width/2, layer_counts, width, label='Total Layers', alpha=0.7)
plt.bar(x + width/2, rnn_layer_counts, width, label='RNN Layers', alpha=0.7)
plt.xlabel('Model')
plt.ylabel('Layer Count')
plt.title('Layer Analysis')
plt.xticks(x, model_names, rotation=45)
plt.legend()

# Theoretical complexity
plt.subplot(2, 3, 4)
# Relative computational complexity (simplified)
complexity_scores = [1.0, 4.0, 3.0, 8.0, 6.0, 8.0]  # Relative to SimpleRNN
plt.bar(model_names, complexity_scores, color=colors)
plt.title('Relative Computational Complexity')
plt.ylabel('Complexity Score')
plt.xticks(rotation=45)

# Training time estimation (theoretical)
plt.subplot(2, 3, 5)
# Based on parameter count and complexity
training_time_est = [p * c / 1000 for p, c in zip(param_counts, complexity_scores)]
plt.bar(model_names, training_time_est, color=colors)
plt.title('Estimated Training Time (Relative)')
plt.ylabel('Relative Time')
plt.xticks(rotation=45)

# Architecture comparison table
plt.subplot(2, 3, 6)
plt.axis('off')
table_data = []
for i, name in enumerate(model_names):
    table_data.append([
        name,
        f"{param_counts[i]:,}",
        f"{layer_counts[i]}",
        f"{rnn_layer_counts[i]}",
        f"{complexity_scores[i]:.1f}"
    ])

table = plt.table(cellText=table_data,
                 colLabels=['Model', 'Parameters', 'Total\nLayers', 'RNN\nLayers', 'Complexity'],
                 cellLoc='center',
                 loc='center')
table.auto_set_font_size(False)
table.set_fontsize(9)
table.scale(1.2, 1.5)
plt.title('Model Comparison Summary')

plt.tight_layout()
plt.show()

print(f"\nModel Creation Complete!")
print(f"Created {len(rnn_models)} different RNN architectures")
print(f"Ready for training and evaluation")


In [None]:
# Training and Evaluation Pipeline
def train_and_evaluate_models(models, X_train, y_train, X_test, y_test, epochs=20, batch_size=32):
    """
    Train and evaluate multiple RNN models
    """
    results = {}
    histories = {}
    
    for name, model in models.items():
        print(f"\nTraining {name}...")
        print("-" * 40)
        
        # Compile model
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        # Create callbacks
        early_stopping = keras.callbacks.EarlyStopping(
            patience=5, restore_best_weights=True, monitor='val_accuracy'
        )
        
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            patience=3, factor=0.5, min_lr=1e-6, monitor='val_accuracy'
        )
        
        # Train model
        history = model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.2,
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        
        # Evaluate on test set
        test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
        
        # Make predictions
        y_pred = model.predict(X_test, verbose=0)
        y_pred_classes = np.argmax(y_pred, axis=1)
        
        # Store results
        results[name] = {
            'model': model,
            'test_accuracy': test_accuracy,
            'test_loss': test_loss,
            'y_pred': y_pred_classes,
            'y_pred_proba': y_pred,
            'history': history
        }
        histories[name] = history.history
        
        print(f"Final test accuracy: {test_accuracy:.4f}")
    
    return results, histories

# Train selected models (subset for demonstration)
selected_models = {
    'SimpleRNN': rnn_models['SimpleRNN'],
    'LSTM': rnn_models['LSTM'],
    'BiLSTM': rnn_models['BiLSTM']
}

print("Training RNN models for sentiment classification...")
print("This may take a few minutes...")

# Train models
model_results, training_histories = train_and_evaluate_models(
    selected_models, 
    X_train_padded, y_train, 
    X_test_padded, y_test,
    epochs=15,  # Reduced for demonstration
    batch_size=16
)

# Analyze results
print(f"\nTraining Results Summary:")
print("=" * 50)

best_accuracy = 0
best_model = ""

for name, result in model_results.items():
    accuracy = result['test_accuracy']
    loss = result['test_loss']
    print(f"{name:12s}: Accuracy={accuracy:.4f}, Loss={loss:.4f}")
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = name

print(f"\nBest performing model: {best_model} with {best_accuracy:.4f} accuracy")

# Detailed analysis of best model
best_result = model_results[best_model]
print(f"\nDetailed Results for {best_model}:")
print(classification_report(y_test, best_result['y_pred'], 
                          target_names=label_encoder.classes_))

# Visualization of results
plt.figure(figsize=(18, 12))

# Training history - Loss
plt.subplot(3, 4, 1)
for name, history in training_histories.items():
    plt.plot(history['loss'], label=f'{name} (train)', linestyle='-')
    plt.plot(history['val_loss'], label=f'{name} (val)', linestyle='--')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

# Training history - Accuracy
plt.subplot(3, 4, 2)
for name, history in training_histories.items():
    plt.plot(history['accuracy'], label=f'{name} (train)', linestyle='-')
    plt.plot(history['val_accuracy'], label=f'{name} (val)', linestyle='--')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# Model performance comparison
plt.subplot(3, 4, 3)
model_names = list(model_results.keys())
accuracies = [result['test_accuracy'] for result in model_results.values()]
colors = ['skyblue', 'lightcoral', 'lightgreen']

bars = plt.bar(model_names, accuracies, color=colors)
plt.title('Test Accuracy Comparison')
plt.ylabel('Accuracy')
plt.ylim(0, 1)

# Add value labels
for bar, acc in zip(bars, accuracies):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
             f'{acc:.3f}', ha='center', va='bottom')

# Confusion matrix for best model
plt.subplot(3, 4, 4)
cm = confusion_matrix(y_test, best_result['y_pred'])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.title(f'Confusion Matrix - {best_model}')
plt.xlabel('Predicted')
plt.ylabel('Actual')

# Training efficiency comparison
plt.subplot(3, 4, 5)
training_epochs = [len(history['loss']) for history in training_histories.values()]
plt.bar(model_names, training_epochs, color=colors)
plt.title('Training Epochs (Early Stopping)')
plt.ylabel('Epochs')

# Learning curves comparison
plt.subplot(3, 4, 6)
for name, history in training_histories.items():
    final_train_acc = history['accuracy'][-1]
    final_val_acc = history['val_accuracy'][-1]
    overfitting = final_train_acc - final_val_acc
    plt.bar(name, overfitting, color=colors[list(training_histories.keys()).index(name)])

plt.title('Overfitting Analysis (Train - Val Acc)')
plt.ylabel('Overfitting Score')
plt.axhline(y=0, color='red', linestyle='--', alpha=0.7)

# Class-wise performance
plt.subplot(3, 4, 7)
class_accuracies = []
for i, class_name in enumerate(label_encoder.classes_):
    class_mask = y_test == i
    if np.sum(class_mask) > 0:
        class_acc = np.mean(best_result['y_pred'][class_mask] == y_test[class_mask])
        class_accuracies.append(class_acc)
    else:
        class_accuracies.append(0)

plt.bar(label_encoder.classes_, class_accuracies, color=['lightgreen', 'lightcoral', 'lightblue'])
plt.title(f'Class-wise Accuracy - {best_model}')
plt.ylabel('Accuracy')
plt.xticks(rotation=45)

# Prediction confidence analysis
plt.subplot(3, 4, 8)
prediction_confidences = np.max(best_result['y_pred_proba'], axis=1)
correct_predictions = best_result['y_pred'] == y_test

plt.hist(prediction_confidences[correct_predictions], alpha=0.7, label='Correct', bins=20)
plt.hist(prediction_confidences[~correct_predictions], alpha=0.7, label='Incorrect', bins=20)
plt.title('Prediction Confidence Distribution')
plt.xlabel('Confidence')
plt.ylabel('Frequency')
plt.legend()

# Loss comparison
plt.subplot(3, 4, 9)
losses = [result['test_loss'] for result in model_results.values()]
plt.bar(model_names, losses, color=colors)
plt.title('Test Loss Comparison')
plt.ylabel('Loss')

# Parameter efficiency
plt.subplot(3, 4, 10)
param_counts = [selected_models[name].count_params() for name in model_names]
efficiency_scores = [acc / (params / 1000) for acc, params in zip(accuracies, param_counts)]
plt.bar(model_names, efficiency_scores, color=colors)
plt.title('Parameter Efficiency (Acc/1K Params)')
plt.ylabel('Efficiency Score')

# Convergence analysis
plt.subplot(3, 4, 11)
convergence_epochs = []
for name, history in training_histories.items():
    val_accs = history['val_accuracy']
    # Find epoch where model reaches 90% of final accuracy
    final_acc = val_accs[-1]
    target_acc = 0.9 * final_acc
    converged_epoch = next((i for i, acc in enumerate(val_accs) if acc >= target_acc), len(val_accs))
    convergence_epochs.append(converged_epoch + 1)

plt.bar(model_names, convergence_epochs, color=colors)
plt.title('Convergence Speed (Epochs to 90% Final Acc)')
plt.ylabel('Epochs')

# Performance summary table
plt.subplot(3, 4, 12)
plt.axis('off')
summary_data = []
for i, name in enumerate(model_names):
    result = model_results[name]
    summary_data.append([
        name,
        f"{result['test_accuracy']:.3f}",
        f"{result['test_loss']:.3f}",
        f"{training_epochs[i]}",
        f"{param_counts[i]:,}"
    ])

table = plt.table(cellText=summary_data,
                 colLabels=['Model', 'Accuracy', 'Loss', 'Epochs', 'Parameters'],
                 cellLoc='center',
                 loc='center')
table.auto_set_font_size(False)
table.set_fontsize(9)
table.scale(1.2, 1.5)
plt.title('Performance Summary')

plt.tight_layout()
plt.show()

# Sample predictions analysis
print(f"\nSample Predictions from Best Model ({best_model}):")
print("-" * 60)

# Show some examples
sample_indices = np.random.choice(len(X_test_text), 5, replace=False)

for i, idx in enumerate(sample_indices):
    text = X_test_text[idx]
    true_label = label_encoder.classes_[y_test[idx]]
    pred_label = label_encoder.classes_[best_result['y_pred'][idx]]
    confidence = np.max(best_result['y_pred_proba'][idx])
    
    print(f"\nExample {i+1}:")
    print(f"Text: '{text[:80]}...'")
    print(f"True: {true_label}, Predicted: {pred_label}")
    print(f"Confidence: {confidence:.3f}")
    print(f"Correct: {'✓' if true_label == pred_label else '✗'}")

print(f"\nRNN-based Sentiment Classification Complete!")
print(f"Best model achieved {best_accuracy:.4f} accuracy")
print(f"Ready for advanced techniques and attention mechanisms!")
