In [None]:
# Import necessary libraries and reproduce data from Part 1
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
import string
import re
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("Libraries imported successfully!")


In [None]:
# Recreate data from Part 1 (compact version)
sample_text = """
To be or not to be, that is the question:
Whether 'tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles
And by opposing end them. To die—to sleep,
No more; and by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to: 'tis a consummation
Devoutly to be wish'd. To die, to sleep;
To sleep, perchance to dream—ay, there's the rub:
For in that sleep of death what dreams may come,
When we have shuffled off this mortal coil,
Must give us pause. There's the respect
That makes calamity of so long life.

The quick brown fox jumps over the lazy dog.
Machine learning is revolutionizing the way we process data.
Deep neural networks can learn complex patterns from large datasets.
Artificial intelligence will transform various industries in the coming years.
Natural language processing enables computers to understand human language.
The future of technology lies in the intersection of AI and human creativity.
"""

# Preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s\.\,\;\:\!\?\-\'\"]', '', text)
    return text.strip()

processed_text = preprocess_text(sample_text)

# Create vocabulary
chars = sorted(list(set(processed_text)))
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}
vocab_size = len(chars)

print(f"Text length: {len(processed_text)} characters")
print(f"Vocabulary size: {vocab_size} unique characters")
print(f"Characters: {chars}")

# Create sequences
sequence_length = 40
step = 3

# Encode text
encoded_text = [char_to_idx[char] for char in processed_text]

# Create training sequences
sequences = []
targets = []

for i in range(0, len(encoded_text) - sequence_length, step):
    sequences.append(encoded_text[i:i + sequence_length])
    targets.append(encoded_text[i + sequence_length])

X = np.array(sequences)
y = np.array(targets)

print(f"Training sequences shape: {X.shape}")
print(f"Training targets shape: {y.shape}")
print(f"Number of training examples: {len(X)}")


In [None]:
# Text Generation Model Architectures
def create_simple_rnn_generator(vocab_size, sequence_length, embedding_dim=256, rnn_units=512):
    """
    Create a simple RNN-based text generator
    """
    model = keras.Sequential([
        keras.layers.Embedding(vocab_size, embedding_dim, input_length=sequence_length),
        keras.layers.SimpleRNN(rnn_units, dropout=0.2, recurrent_dropout=0.2),
        keras.layers.Dense(vocab_size, activation='softmax')
    ])
    return model

def create_lstm_generator(vocab_size, sequence_length, embedding_dim=256, lstm_units=512):
    """
    Create an LSTM-based text generator
    """
    model = keras.Sequential([
        keras.layers.Embedding(vocab_size, embedding_dim, input_length=sequence_length),
        keras.layers.LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2),
        keras.layers.Dense(vocab_size, activation='softmax')
    ])
    return model

def create_gru_generator(vocab_size, sequence_length, embedding_dim=256, gru_units=512):
    """
    Create a GRU-based text generator
    """
    model = keras.Sequential([
        keras.layers.Embedding(vocab_size, embedding_dim, input_length=sequence_length),
        keras.layers.GRU(gru_units, dropout=0.2, recurrent_dropout=0.2),
        keras.layers.Dense(vocab_size, activation='softmax')
    ])
    return model

def create_stacked_lstm_generator(vocab_size, sequence_length, embedding_dim=256, lstm_units=512):
    """
    Create a stacked LSTM text generator
    """
    model = keras.Sequential([
        keras.layers.Embedding(vocab_size, embedding_dim, input_length=sequence_length),
        keras.layers.LSTM(lstm_units, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        keras.layers.LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2),
        keras.layers.Dense(vocab_size, activation='softmax')
    ])
    return model

# Create different model architectures
print("Creating different RNN architectures...")

models = {
    'Simple RNN': create_simple_rnn_generator(vocab_size, sequence_length, embedding_dim=128, rnn_units=256),
    'LSTM': create_lstm_generator(vocab_size, sequence_length, embedding_dim=128, lstm_units=256),
    'GRU': create_gru_generator(vocab_size, sequence_length, embedding_dim=128, gru_units=256),
    'Stacked LSTM': create_stacked_lstm_generator(vocab_size, sequence_length, embedding_dim=128, lstm_units=256)
}

# Display model architectures
for name, model in models.items():
    print(f"\n{name} Architecture:")
    print(f"Parameters: {model.count_params():,}")
    model.summary()
    print("-" * 60)

# Compare model complexities
plt.figure(figsize=(12, 8))

# Parameter comparison
plt.subplot(2, 2, 1)
param_counts = [model.count_params() for model in models.values()]
model_names = list(models.keys())
colors = ['skyblue', 'lightcoral', 'lightgreen', 'orange']

bars = plt.bar(model_names, param_counts, color=colors)
plt.title('Model Complexity (Parameters)')
plt.ylabel('Number of Parameters')
plt.xticks(rotation=45)

# Add value labels on bars
for bar, count in zip(bars, param_counts):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + max(param_counts)*0.01,
             f'{count:,}', ha='center', va='bottom')

# Memory usage estimation
plt.subplot(2, 2, 2)
# Rough estimation: 4 bytes per parameter (float32)
memory_mb = [params * 4 / (1024 * 1024) for params in param_counts]
plt.bar(model_names, memory_mb, color=colors)
plt.title('Estimated Memory Usage')
plt.ylabel('Memory (MB)')
plt.xticks(rotation=45)

# Training speed comparison (theoretical)
plt.subplot(2, 2, 3)
# Relative complexity (normalized)
complexity_scores = [1.0, 1.5, 1.2, 2.0]  # Relative to Simple RNN
plt.bar(model_names, complexity_scores, color=colors)
plt.title('Relative Training Complexity')
plt.ylabel('Complexity Score')
plt.xticks(rotation=45)

# Architecture visualization
plt.subplot(2, 2, 4)
layer_counts = [
    len([l for l in model.layers if 'rnn' in l.name.lower() or 'lstm' in l.name.lower() or 'gru' in l.name.lower()])
    for model in models.values()
]
plt.bar(model_names, layer_counts, color=colors)
plt.title('Number of Recurrent Layers')
plt.ylabel('Layer Count')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Choose LSTM for detailed training (good balance of performance and stability)
selected_model = models['LSTM']
print(f"\nSelected model: LSTM")
print(f"Total parameters: {selected_model.count_params():,}")


In [None]:
# Model compilation and training
print("Compiling model...")
selected_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Split data for training and validation
split_idx = int(0.8 * len(X))
X_train, X_val = X[:split_idx], X[split_idx:]
y_train, y_val = y[:split_idx], y[split_idx:]

print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Validation set: {X_val.shape}, {y_val.shape}")

# Training configuration
batch_size = 64
epochs = 50

# Custom callback to generate text during training
class TextGenerationCallback(keras.callbacks.Callback):
    def __init__(self, seed_text="to be or not to be", generate_length=100):
        self.seed_text = seed_text
        self.generate_length = generate_length
    
    def on_epoch_end(self, epoch, logs=None):
        if epoch % 10 == 0:  # Generate text every 10 epochs
            generated = self.generate_text()
            print(f"\nEpoch {epoch} - Generated text:")
            print(f"'{generated}'")
            print("-" * 50)
    
    def generate_text(self):
        # Simple greedy generation
        current_sequence = [char_to_idx.get(c, 0) for c in self.seed_text[-sequence_length:]]
        if len(current_sequence) < sequence_length:
            current_sequence = [0] * (sequence_length - len(current_sequence)) + current_sequence
        
        generated = self.seed_text
        
        for _ in range(self.generate_length):
            # Predict next character
            x_pred = np.array([current_sequence])
            pred = self.model.predict(x_pred, verbose=0)
            next_char_idx = np.argmax(pred[0])
            next_char = idx_to_char.get(next_char_idx, '')
            
            generated += next_char
            
            # Update sequence
            current_sequence = current_sequence[1:] + [next_char_idx]
        
        return generated

# Create callbacks
text_callback = TextGenerationCallback(seed_text="to be or not", generate_length=50)
early_stopping = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(patience=5, factor=0.5, min_lr=1e-6)

print("Starting training...")
history = selected_model.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val),
    callbacks=[text_callback, early_stopping, reduce_lr],
    verbose=1
)

print("Training completed!")


In [None]:
# Advanced text generation with temperature sampling
def generate_text_with_temperature(model, seed_text, length=200, temperature=1.0):
    """
    Generate text using temperature sampling
    
    Parameters:
    model: trained RNN model
    seed_text: starting text
    length: number of characters to generate
    temperature: sampling temperature (higher = more random)
    """
    # Prepare seed sequence
    current_sequence = [char_to_idx.get(c, 0) for c in seed_text[-sequence_length:]]
    if len(current_sequence) < sequence_length:
        current_sequence = [0] * (sequence_length - len(current_sequence)) + current_sequence
    
    generated = seed_text
    
    for _ in range(length):
        # Get prediction
        x_pred = np.array([current_sequence])
        predictions = model.predict(x_pred, verbose=0)[0]
        
        # Apply temperature
        if temperature == 0:
            # Greedy sampling
            next_char_idx = np.argmax(predictions)
        else:
            # Temperature sampling
            predictions = predictions / temperature
            predictions = np.exp(predictions) / np.sum(np.exp(predictions))
            next_char_idx = np.random.choice(len(predictions), p=predictions)
        
        next_char = idx_to_char.get(next_char_idx, '')
        generated += next_char
        
        # Update sequence
        current_sequence = current_sequence[1:] + [next_char_idx]
    
    return generated

# Visualize training progress
plt.figure(figsize=(15, 10))

# Training and validation loss
plt.subplot(2, 3, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

# Training and validation accuracy
plt.subplot(2, 3, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# Learning rate (if available)
plt.subplot(2, 3, 3)
if 'lr' in history.history:
    plt.plot(history.history['lr'])
    plt.title('Learning Rate Schedule')
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.yscale('log')
else:
    plt.text(0.5, 0.5, 'Learning Rate\nSchedule\n(Not recorded)', 
             ha='center', va='center', transform=plt.gca().transAxes)
    plt.title('Learning Rate Schedule')
plt.grid(True, alpha=0.3)

# Generate text with different temperatures
temperatures = [0.2, 0.5, 1.0, 1.2]
seed = "to be or not to be"

plt.subplot(2, 3, 4)
temp_samples = []
for temp in temperatures:
    sample = generate_text_with_temperature(selected_model, seed, length=50, temperature=temp)
    temp_samples.append(len(set(sample)))  # Unique characters as diversity measure

plt.bar(range(len(temperatures)), temp_samples, color='lightcoral')
plt.xlabel('Temperature')
plt.ylabel('Unique Characters')
plt.title('Text Diversity vs Temperature')
plt.xticks(range(len(temperatures)), temperatures)

# Character frequency in generated text
plt.subplot(2, 3, 5)
sample_text = generate_text_with_temperature(selected_model, seed, length=200, temperature=1.0)
char_freq = Counter(sample_text)
common_chars = char_freq.most_common(10)
chars, freqs = zip(*common_chars)
plt.bar(range(len(chars)), freqs)
plt.xlabel('Characters')
plt.ylabel('Frequency')
plt.title('Character Frequency in Generated Text')
plt.xticks(range(len(chars)), chars)

# Model performance summary
plt.subplot(2, 3, 6)
final_train_acc = history.history['accuracy'][-1]
final_val_acc = history.history['val_accuracy'][-1]
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]

metrics = ['Train Acc', 'Val Acc', 'Train Loss', 'Val Loss']
values = [final_train_acc, final_val_acc, final_train_loss, final_val_loss]
colors = ['green', 'lightgreen', 'red', 'lightcoral']

plt.bar(metrics, values, color=colors)
plt.title('Final Model Performance')
plt.ylabel('Value')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Generate and display text samples with different temperatures
print("Text Generation Examples with Different Temperatures:")
print("=" * 80)

for temp in [0.2, 0.7, 1.0, 1.5]:
    print(f"\nTemperature: {temp}")
    print("-" * 40)
    generated = generate_text_with_temperature(selected_model, "the future of", length=150, temperature=temp)
    print(f"'{generated}'")

# Analyze model performance
print(f"\nModel Performance Summary:")
print(f"Final Training Accuracy: {final_train_acc:.4f}")
print(f"Final Validation Accuracy: {final_val_acc:.4f}")
print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Epochs Trained: {len(history.history['loss'])}")

# Generation quality analysis
print(f"\nGeneration Quality Analysis:")
sample_gen = generate_text_with_temperature(selected_model, "machine learning", length=200, temperature=1.0)
words = sample_gen.split()
avg_word_length = np.mean([len(word) for word in words])
unique_words = len(set(words))
total_words = len(words)

print(f"Generated text length: {len(sample_gen)} characters")
print(f"Average word length: {avg_word_length:.2f}")
print(f"Vocabulary diversity: {unique_words}/{total_words} = {unique_words/total_words:.2f}")
print(f"Sample: '{sample_gen[:100]}...'")

print("\nText generation model training and evaluation complete!")
