# Text Generation Demo

This notebook demonstrates how to use the text generation models implemented in this project. It includes loading the models, preprocessing input data, and generating text samples.

In [None]:
# Import required libraries
import sys
sys.path.append('..')

import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from utils.text_preprocessing import TextPreprocessor
from models.lstm_model import LSTMTextGenerator, TextDataset, train_lstm_model
from models.gpt_model import GPTTextGenerator
from torch.utils.data import DataLoader

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

In [None]:
# Load sample data
def load_sample_data():
    sample_texts = [
        "Technology is rapidly evolving and changing our daily lives. Artificial intelligence and machine learning are becoming integral parts of modern society. Smart devices connect us globally while automation increases efficiency in various industries.",
        
        "Climate change represents one of the most pressing challenges of our time. Rising temperatures affect weather patterns worldwide. Sustainable energy solutions and environmental conservation efforts are crucial for future generations.",
        
        "Space exploration continues to fascinate humanity and drive scientific advancement. Recent missions to Mars have provided valuable insights about our neighboring planet. Private companies are now contributing significantly to space research and development.",
        
        "Education systems worldwide are adapting to digital transformation. Online learning platforms provide accessible education to students globally. Interactive technologies enhance traditional teaching methods and improve learning outcomes.",
        
        "Healthcare innovation saves lives and improves quality of life for millions. Medical research leads to breakthrough treatments for various diseases. Personalized medicine and genetic therapies represent the future of healthcare."
    ]
    return sample_texts

texts = load_sample_data()

print("Sample Data Loaded:")
print(f"Number of texts: {len(texts)}")
print(f"Average text length: {np.mean([len(text) for text in texts]):.1f} characters")

# Display first text
print(f"\nFirst text sample:\n{texts[0]}")

In [None]:
# Initialize preprocessor
preprocessor = TextPreprocessor()

# Build vocabulary
vocab = preprocessor.build_vocabulary(texts, min_freq=1)

print(f"Vocabulary size: {len(vocab)}")
print(f"Sample vocabulary items: {list(vocab.items())[:10]}")

# Create sequences
sequences = preprocessor.create_sequences(texts, sequence_length=15)
print(f"Number of training sequences: {len(sequences)}")

# Show sample sequence
sample_seq = sequences[0]
print(f"\nSample sequence (indices): {sample_seq}")
print(f"Sample sequence (words): {preprocessor.sequence_to_text(sample_seq)}")

In [None]:
# Create dataset and dataloader
dataset = TextDataset(sequences, sequence_length=15)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# Initialize LSTM model
lstm_model = LSTMTextGenerator(
    vocab_size=preprocessor.vocab_size,
    embedding_dim=64,
    hidden_dim=128,
    num_layers=2,
    dropout=0.2
)

print(f"LSTM Model Architecture:")
print(lstm_model)

# Train the model
print("\nTraining LSTM model...")
losses = train_lstm_model(lstm_model, dataloader, num_epochs=10, learning_rate=0.01)

# Plot training loss
plt.figure(figsize=(12, 6))
plt.plot(losses, marker='o', linewidth=2, markersize=6)
plt.title('LSTM Training Loss Over Time', fontsize=16, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Final training loss: {losses[-1]:.4f}")

In [None]:
# Generate text with different prompts
prompts = [
    "Technology",
    "Climate change",
    "Space",
    "Education",
    "Healthcare innovation"
]

print("LSTM Generated Text:")
print("=" * 50)

for i, prompt in enumerate(prompts, 1):
    generated = lstm_model.generate_text(
        preprocessor, 
        start_text=prompt, 
        max_length=40,
        temperature=0.8
    )
    
    print(f"{i}. Prompt: '{prompt}'")
    print(f"   Generated: {generated}")
    print("-" * 50)

In [None]:
# Initialize GPT model
print("Loading GPT-2 model...")
gpt_generator = GPTTextGenerator('gpt2')

# Generate text with GPT
print("\nGPT-2 Generated Text:")
print("=" * 50)

gpt_prompts = [
    "Technology is revolutionizing",
    "Climate change impacts our",
    "Space exploration reveals new",
    "Modern education systems are",
    "Healthcare innovations provide"
]

for i, prompt in enumerate(gpt_prompts, 1):
    generated_texts = gpt_generator.generate_text(
        prompt=prompt,
        max_length=60,
        temperature=0.7,
        num_return_sequences=1
    )
    
    print(f"{i}. Prompt: '{prompt}'")
    print(f"   Generated: {generated_texts[0]}")
    print("-" * 50)

In [None]:
# Compare both models on the same prompts
comparison_prompts = ["Technology", "Climate", "Space"]

print("Model Comparison:")
print("=" * 60)

for prompt in comparison_prompts:
    print(f"\nPrompt: '{prompt}'")
    print("-" * 30)
    
    # LSTM generation
    lstm_generated = lstm_model.generate_text(
        preprocessor, 
        start_text=prompt, 
        max_length=30,
        temperature=0.8
    )
    
    # GPT generation
    gpt_generated = gpt_generator.generate_text(
        prompt=prompt,
        max_length=50,
        temperature=0.8
    )[0]
    
    print(f"LSTM: {lstm_generated}")
    print(f"GPT:  {gpt_generated}")
    print("=" * 60)

In [None]:
def interactive_text_generation():
    """Interactive function for text generation"""
    print("Interactive Text Generation")
    print("Enter your prompts below (type 'stop' to end)")
    
    while True:
        user_prompt = input("\nEnter your prompt: ").strip()
        
        if user_prompt.lower() == 'stop':
            break
            
        if not user_prompt:
            continue
            
        print("\nChoose model: 1) LSTM  2) GPT  3) Both")
        model_choice = input("Enter choice (1, 2, or 3): ").strip()
        
        if model_choice in ['1', '3']:
            lstm_result = lstm_model.generate_text(
                preprocessor, 
                start_text=user_prompt, 
                max_length=40,
                temperature=0.8
            )
            print(f"\nLSTM Result: {lstm_result}")
        
        if model_choice in ['2', '3']:
            gpt_result = gpt_generator.generate_text(
                prompt=user_prompt,
                max_length=60,
                temperature=0.8
            )[0]
            print(f"\nGPT Result: {gpt_result}")

# Uncomment the line below to run interactive generation
# interactive_text_generation()