In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import random
from torch.nn.utils.rnn import pad_sequence

# Set random seeds
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [None]:
# Demonstrate teacher forcing vs free running
def compare_training_modes():
    """Compare teacher forcing and free running during training"""
    
    print("TEACHER FORCING vs FREE RUNNING COMPARISON")
    print("=" * 50)
    
    # Example: translating "Hello" to "Bonjour"
    input_sentence = "Hello"
    target_sentence = "Bonjour"
    
    print(f"Input: {input_sentence}")
    print(f"Target: {target_sentence}")
    print()
    
    # Teacher forcing mode (training)
    print("TEACHER FORCING (Training Mode):")
    print("Decoder Input:  [SOS] B  o  n  j  o  u")
    print("Decoder Target: B  o  n  j  o  u  r [EOS]")
    print("- Uses ground truth as input at each step")
    print("- Fast and stable training")
    print("- No error accumulation during training")
    print()
    
    # Free running mode (inference)
    print("FREE RUNNING (Inference Mode):")
    print("Step 1: Input=[SOS]     -> Output=B")
    print("Step 2: Input=B         -> Output=o") 
    print("Step 3: Input=o         -> Output=n")
    print("Step 4: Input=n         -> Output=j")
    print("- Uses own predictions as input")
    print("- Errors can accumulate")
    print("- Real-world usage scenario")

compare_training_modes()


In [None]:
# Demonstrate sequence padding for batch processing
def demonstrate_sequence_padding():
    """Show how to handle variable-length sequences in batches"""
    
    # Example sentences of different lengths
    sentences = [
        ["I", "love", "AI"],
        ["Machine", "learning", "is", "fascinating"],
        ["Deep", "neural", "networks", "are", "powerful", "tools"],
        ["NLP"]
    ]
    
    print("SEQUENCE PADDING DEMONSTRATION")
    print("=" * 40)
    
    # Show original sequences
    print("Original sequences:")
    for i, sent in enumerate(sentences):
        print(f"  {i+1}: {sent} (length: {len(sent)})")
    
    # Find maximum length
    max_len = max(len(sent) for sent in sentences)
    print(f"\nMaximum length: {max_len}")
    
    # Pad sequences
    padded_sequences = []
    for sent in sentences:
        padded = sent + ["<PAD>"] * (max_len - len(sent))
        padded_sequences.append(padded)
    
    print("\nPadded sequences:")
    for i, sent in enumerate(padded_sequences):
        print(f"  {i+1}: {sent}")
    
    return padded_sequences

# Run the demonstration
padded_seqs = demonstrate_sequence_padding()
