In [3]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Circle
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.io as pio
pio.renderers.default = "notebook"

class LLMPipelineInteractive:
    def __init__(self):
        # Define colors
        self.colors = {
            'data': '#FF6B6B',           # Red - Raw data
            'processing': '#4ECDC4',      # Teal - Processing
            'pretraining': '#FFE66D',     # Yellow - Pre-training
            'alignment': '#95E77E',       # Green - Alignment
            'evaluation': '#B19CD9',      # Purple - Evaluation
            'deployment': '#FFA07A',      # Light coral - Deployment
            'infrastructure': '#D3D3D3',  # Light gray - Infrastructure
            'feedback': '#FF69B4',        # Hot pink - Human feedback
            'highlight': '#FFD700',       # Gold - Highlight
            'inactive': '#E8E8E8'         # Light gray - Inactive
        }
        
        # Define pipeline steps with detailed, intuitive descriptions
        self.steps = [
            {
                'name': 'Overview',
                'stage': 'all',
                'highlight': [],
                'description': """
                <h2>🚀 Complete LLM Pipeline</h2>
                <p>This visualization shows the entire process of developing a modern Large Language Model.</p>
                <b>Key Facts:</b>
                <ul>
                <li>📅 Timeline: 6-18 months from start to deployment</li>
                <li>💰 Cost: $50M - $200M+ for training alone</li>
                <li>🖥️ Compute: 1000s of GPUs working in parallel</li>
                <li>📊 Data: Terabytes of text processed</li>
                </ul>
                <p style="background: #e8f4fd; padding: 10px; border-radius: 5px;">
                <b>💡 Tip:</b> Use the navigation buttons to explore each step in detail. Each stage builds upon the previous one!
                </p>
                """
            },
            {
                'name': '1. Web Crawling',
                'stage': 'data',
                'highlight': [(1.5, 11)],
                'description': """
                <h2>🌐 Web Crawling: Building a Digital Library</h2>
                <p>Think of web crawling as sending out thousands of digital librarians to read and collect text from across the internet.</p>
                <b>What Actually Happens:</b>
                <ul>
                <li>🕷️ Automated programs visit billions of web pages</li>
                <li>📝 They extract and save the text content</li>
                <li>🌍 Common Crawl provides 250+ billion pages of data</li>
                <li>🔍 This creates a snapshot of human knowledge online</li>
                </ul>
                <b>The Challenge:</b>
                <p>Not all internet text is useful! The system filters out:</p>
                <ul>
                <li>Spam and advertisements (lots of junk online!)</li>
                <li>Duplicate content (same article on 100 sites)</li>
                <li>Personal information that shouldn't be used</li>
                <li>Low-quality or nonsensical text</li>
                </ul>
                """
            },
            {
                'name': '2. Books & Articles',
                'stage': 'data',
                'highlight': [(3.5, 11)],
                'description': """
                <h2>📚 Books & Academic Articles: Quality Knowledge</h2>
                <p>While the web provides quantity, published works provide quality. Books and papers teach the AI formal writing, complex reasoning, and verified facts.</p>
                <b>Why Books Matter:</b>
                <ul>
                <li>📖 Professional editing = higher quality writing</li>
                <li>🎓 Academic papers = rigorous, peer-reviewed facts</li>
                <li>📚 Literature = creative expression and storytelling</li>
                <li>📘 Textbooks = structured educational content</li>
                </ul>
                <b>Real Sources Used:</b>
                <ul>
                <li>Project Gutenberg: 70,000+ free classic books</li>
                <li>ArXiv: 2+ million scientific papers</li>
                <li>PubMed: 35+ million medical research articles</li>
                <li>Publisher partnerships for modern content</li>
                </ul>
                <p>This helps the AI learn to write professionally and understand complex topics deeply.</p>
                """
            },
            {
                'name': '3. Code Repositories',
                'stage': 'data',
                'highlight': [(5.5, 11)],
                'description': """
                <h2>💻 Code Repositories: Learning to Program</h2>
                <p>Modern LLMs are powerful coding assistants. They learn by studying millions of real programming examples.</p>
                <b>How AI Learns Programming:</b>
                <ul>
                <li>🐙 Studies millions of GitHub projects</li>
                <li>💡 Learns from code comments that explain logic</li>
                <li>🔍 Understands patterns across 100+ languages</li>
                <li>📋 Connects code with documentation</li>
                </ul>
                <b>Why Code Training Matters:</b>
                <p>Programming teaches the AI:</p>
                <ul>
                <li>Logical thinking and problem-solving</li>
                <li>Following precise syntax rules</li>
                <li>Breaking complex problems into steps</li>
                <li>Debugging and finding errors</li>
                </ul>
                <p>Result: AI that can help write, explain, and debug code!</p>
                """
            },
            {
                'name': '4. Data Cleaning',
                'stage': 'processing',
                'highlight': [(2.5, 9.5)],
                'description': """
                <h2>🧹 Data Cleaning: Quality Control</h2>
                <p>Raw internet data is messy! This step is like washing and preparing ingredients before cooking a meal.</p>
                <b>What Gets Cleaned:</b>
                <ul>
                <li>🏷️ <b>HTML tags removed:</b> Strips away web code, keeping only text</li>
                <li>🔧 <b>Character encoding fixed:</b> Ensures émojis and spëcial characters work</li>
                <li>🚫 <b>Toxic content filtered:</b> Removes harmful or inappropriate text</li>
                <li>📏 <b>Format standardized:</b> Makes everything consistent</li>
                </ul>
                <b>Deduplication - Why It Matters:</b>
                <p>The same news article might appear on 100 websites. Without deduplication:</p>
                <ul>
                <li>AI would memorize repeated content</li>
                <li>Training would be inefficient</li>
                <li>Model would be biased toward duplicated text</li>
                </ul>
                <p>Smart algorithms find and remove duplicates, keeping only the best version. This typically removes 30-40% of data!</p>
                """
            },
            {
                'name': '5. Quality Filtering',
                'stage': 'processing',
                'highlight': [(5, 9.5)],
                'description': """
                <h2>✅ Quality & Safety Filtering</h2>
                <p>Not all text should be used for training. This critical step ensures the AI learns from appropriate, high-quality content.</p>
                <b>Quality Checks - What Makes Good Training Data:</b>
                <ul>
                <li>📝 <b>Coherent writing:</b> Text must make logical sense</li>
                <li>📊 <b>Information density:</b> Content should be educational</li>
                <li>🌍 <b>Language verification:</b> Correctly identified languages</li>
                <li>📚 <b>Educational value:</b> Helps the AI learn something useful</li>
                </ul>
                <b>Privacy & Safety Protection:</b>
                <ul>
                <li>🔒 <b>PII removal:</b> Attempts to remove sensitive information</li>
                <li>⚖️ <b> Bias Reduction:</b> Balances perspectives tries to avoid unfair stereotypes</li>
                <li>🚫 <b>Harmful content removal:</b> Tries to remove hate speech and extreme content</li>
                <li>©️ <b>Copyright check:</b> Tries to exclude known copyrighted material</li>
                </ul>
                <p>Despite these attempts, the privacy and safety protection efficacy remains debatable!</p>
                """
            },
            {
                'name': '6. Tokenization',
                'stage': 'processing',
                'highlight': [(7.5, 9.5)],
                'description': """
                <h2>🔤 Tokenization: Teaching Computers to Read</h2>
                <p>Computers only understand numbers, not words! Tokenization converts text into numerical codes the AI can process.</p>
                <b>How It Works - Simple Example:</b>
                <ul>
                <li>Text: "Hello, world!"</li>
                <li>Tokens: [15496, 11, 995, 0]</li>
                <li>Where: "Hello"=15496, ","=11, "world"=995, "!"=0</li>
                </ul>
                <b>Smart Tokenization:</b>
                <p>Common words = 1 token, Rare words = multiple tokens</p>
                <ul>
                <li>"cat" → [9246] (1 token - common word)</li>
                <li>"cryptocurrency" → [23919, 16353] (2 tokens - less common)</li>
                <li>"🤖" → [129302] (emojis get their own tokens!)</li>
                </ul>
                <b>Why This Matters:</b>
                <ul>
                <li>Handles any word, even made-up ones</li>
                <li>Works across all languages seamlessly</li>
                <li>Efficient processing of billions of words</li>
                <li>Preserves meaning while enabling computation</li>
                </ul>
                """
            },
            {
                'name': '7. Transformer Architecture',
                'stage': 'pretraining',
                'highlight': [(1.5, 7.5), (3.5, 7.5), (5.5, 7.5)],
                'description': """
                <h2>🏗️ Transformer Architecture: The AI's Brain Structure</h2>
                <p>The Transformer is the breakthrough design that made ChatGPT, Claude, and other modern AI possible.</p>
                <b>Key Components Explained Simply:</b>
                <ul>
                <li>👁️ <b>Attention Mechanism:</b> Like human focus - the AI can "pay attention" to relevant words. When reading "The cat sat on the mat", it knows "sat" relates to "cat"</li>
                <li>🔄 <b>Multi-Head Attention:</b> Like having multiple experts each looking for different patterns (grammar expert, meaning expert, context expert)</li>
                <li>🔀 <b>Feed-Forward Networks:</b> Processes and refines understanding, like digesting information</li>
                <li>📍 <b>Positional Encoding:</b> Understands word order - "Dog bites man" vs "Man bites dog"</li>
                </ul>
                <b>The Incredible Scale:</b>
                <ul>
                <li>GPT-3: 175 billion parameters (like 175 billion adjustable knobs)</li>
                <li>GPT-4: ~1.76 trillion parameters (10x bigger!)</li>
                <li>96-120 layers deep (like a 120-story building of processing)</li>
                </ul>
                <p>Each layer adds deeper understanding, from basic grammar to complex reasoning!</p>
                """
            },
            {
                'name': '8. Pre-training',
                'stage': 'pretraining',
                'highlight': [(2.5, 6), (6, 6)],
                'description': """
                <h2>🎯 Pre-training: Learning Language Patterns</h2>
                <p>The AI learns by playing a massive "guess the next word" game billions of times.</p>
                <b>The Learning Game:</b>
                <p>Given: "The capital of France is..."</p>
                <p>AI tries to predict: "Paris"</p>
                <p>If wrong, it adjusts. If right, it reinforces that pattern.</p>
                <b>What Emerges From This Simple Task:</b>
                <ul>
                <li>Grammar and writing styles</li>
                <li>Facts about the world</li>
                <li>Logical reasoning abilities</li>
                <li>Creative expression</li>
                <li>Problem-solving skills</li>
                </ul>
                <b>The Mind-Blowing Scale:</b>
                <ul>
                <li>⚡ <b>Computing power:</b> 1000+ top-tier GPUs running 24/7</li>
                <li>⏱️ <b>Training duration:</b> 3-6 months non-stop</li>
                <li>📊 <b>Data processed:</b> Trillions of word predictions</li>
                <li>💰 <b>Cost:</b> $10-50 million just in electricity and compute</li>
                </ul>
                <p>Like teaching a child to read by having them read every book in existence!</p>
                """
            },
            {
                'name': '9. Supervised Fine-Tuning',
                'stage': 'alignment',
                'highlight': [(10, 7.5), (12.5, 7.5)],
                'description': """
                <h2>🎓 Supervised Fine-Tuning: Teaching Good Behavior</h2>
                <p>Pre-training creates a genius that doesn't know how to be helpful. Fine-tuning teaches it to be a useful assistant.</p>
                <b>The Transformation:</b>
                <p>Before: AI that can complete text but might write anything</p>
                <p>After: AI that follows instructions and helps users</p>
                <b>How It's Taught:</b>
                <ul>
                <li>🎯 <b>Following instructions:</b> "Write a poem" → AI writes a poem (not a recipe!)</li>
                <li>❓ <b>Answering questions:</b> Provides helpful, accurate responses</li>
                <li>🚫 <b>Refusing bad requests:</b> Politely declines harmful instructions</li>
                <li>💬 <b>Conversation skills:</b> Maintains context and coherent dialogue</li>
                </ul>
                <b>The Training Data:</b>
                <ul>
                <li>~100,000 carefully crafted examples (quality over quantity)</li>
                <li>Human experts write ideal responses</li>
                <li>Covers diverse scenarios and use cases</li>
                <li>Emphasizes being helpful, harmless, and honest</li>
                </ul>
                <p>Like the difference between knowing a language and knowing how to use it professionally!</p>
                """
            },
            {
                'name': '10. Human Feedback',
                'stage': 'alignment',
                'highlight': [(15, 7.5), (10, 6)],
                'description': """
                <h2>👥 Human Feedback: Learning Human Values</h2>
                <p>AI doesn't inherently know what humans consider good or bad. This stage teaches human preferences through direct feedback.</p>
                <b>How Humans Teach the AI:</b>
                <ul>
                <li>⭐ <b>Rating responses:</b> Humans score outputs from excellent to poor</li>
                <li>🥇 <b>Ranking comparisons:</b> "Is response A or B better?" teaches nuanced preferences</li>
                <li>✅ <b>Safety checks:</b> Identifying potentially harmful outputs</li>
                <li>💡 <b>Helpfulness scoring:</b> How well does it actually help the user?</li>
                </ul>
                <b>Building a Reward Model - An AI Preference Predictor:</b>
                <p>From thousands of human ratings, a separate AI learns to predict what humans like:</p>
                <ul>
                <li>Acts as an automated quality checker</li>
                <li>Can evaluate millions of responses quickly</li>
                <li>Guides the main AI toward better behavior</li>
                <li>Like having an AI teaching assistant that knows what the professor wants</li>
                </ul>
                <p>This bridges the gap between raw capability and human-aligned helpfulness!</p>
                """
            },
            {
                'name': '11. RLHF Training',
                'stage': 'alignment',
                'highlight': [(13, 6), (16, 6)],
                'description': """
                <h2>🔄 RLHF: Learning Through Reinforcement</h2>
                <p>RLHF (Reinforcement Learning from Human Feedback) is like training with a coach who rewards good performance.</p>
                <b>The Training Loop - How It Works:</b>
                <ol>
                <li>💬 AI generates a response to a prompt</li>
                <li>⭐ Reward model scores it (predicting human preference)</li>
                <li>📈 AI adjusts to maximize reward scores</li>
                <li>🔄 Repeat millions of times</li>
                </ol>
                <b>Key Methods Explained Simply:</b>
                <ul>
                <li>🎯 <b>PPO (Proximal Policy Optimization):</b> Like carefully adjusting your golf swing - small improvements each time rather than drastic changes that might make things worse</li>
                <li>📊 <b>DPO (Direct Preference Optimization):</b> Learning directly from "A is better than B" comparisons - simpler but effective</li>
                <li>📜 <b>Constitutional AI:</b> Teaching AI to follow a set of principles (like a constitution) for self-improvement</li>
                <li>⚖️ <b>KL Divergence Penalty:</b> Prevents the AI from changing too much from its original training - maintains stability while improving</li>
                </ul>
                <p>Result: AI that naturally produces helpful, accurate, and safe responses!</p>
                """
            },
            {
                'name': '12. Evaluation',
                'stage': 'evaluation',
                'highlight': [(9.5, 4), (11.5, 4), (13.5, 4), (15.5, 4)],
                'description': """
                <h2>📊 Comprehensive Evaluation: Testing Everything</h2>
                <p>Before release, the model faces rigorous testing - like a final exam covering every possible subject!</p>
                <b>Academic Benchmarks - Testing Intelligence:</b>
                <ul>
                <li>🧠 <b>MMLU (Massive Multitask Language Understanding):</b> Tests knowledge across 57 subjects - like taking every AP exam at once</li>
                <li>💡 <b>HellaSwag:</b> Tests common sense - can the AI complete everyday scenarios logically?</li>
                <li>💻 <b>HumanEval:</b> Tests programming ability - can it write working code?</li>
                <li>✅ <b>TruthfulQA:</b> Tests honesty - does it make things up or stick to facts?</li>
                </ul>
                <b>Safety Testing - Finding Weaknesses:</b>
                <ul>
                <li>🔴 <b>Red Team Attacks:</b> Security experts try to "break" the AI - attempting to make it say harmful things to find vulnerabilities</li>
                <li>⚖️ <b>Bias Testing:</b> Checking for unfair treatment - does it show prejudice against any groups?</li>
                <li>🚫 <b>Toxicity Detection:</b> Ensuring no harmful content - even in edge cases</li>
                <li>🔓 <b>Jailbreak Resistance:</b> Can users trick it into ignoring safety rules? Testing various clever workarounds</li>
                </ul>
                <p>Only models passing all tests move to deployment!</p>
                """
            },
            {
                'name': '13. Deployment',
                'stage': 'deployment',
                'highlight': [(1.5, 4), (3.5, 4), (5.5, 4)],
                'description': """
                <h2>🚢 Model Deployment: Going Live</h2>
                <p>The model is ready! Now it needs optimization to serve millions of users efficiently and affordably.</p>
                <b>Making It Fast - Optimization Techniques:</b>
                <ul>
                <li>📉 <b>Quantization:</b> Like compressing a photo - reduces precision slightly but saves huge amounts of memory</li>
                <li>🗜️ <b>Model Distillation:</b> Creating a smaller "student" model that learns from the big model - 10x smaller, nearly as smart</li>
                <li>⚡ <b>Flash Attention:</b> Mathematical tricks to process long texts faster</li>
                <li>🚀 <b>Tensor Parallelism:</b> Splitting the model across multiple GPUs - like having multiple brains work together</li>
                </ul>
                <b>Infrastructure at Scale:</b>
                <ul>
                <li>🔌 <b>API Endpoints:</b> Secure connections allowing apps to use the AI</li>
                <li>⚖️ <b>Load Balancing:</b> Distributing millions of requests across thousands of servers</li>
                <li>💾 <b>Smart Caching:</b> Remembering common questions to answer instantly</li>
                <li>🌍 <b>Geographic Distribution:</b> Servers worldwide for fast, local responses</li>
                </ul>
                <p>Now capable of having millions of conversations simultaneously!</p>
                """
            },
            {
                'name': '14. Monitoring',
                'stage': 'deployment',
                'highlight': [(2.5, 2.8), (5.5, 2.8)],
                'description': """
                <h2>📈 Continuous Monitoring: Always Improving</h2>
                <p>Deployment isn't the end - it's the beginning of continuous learning from real-world use.</p>
                <b>What Gets Monitored:</b>
                <ul>
                <li>📊 <b>Usage Patterns:</b> What do people actually ask? What confuses the AI?</li>
                <li>⭐ <b>Quality Metrics:</b> Response accuracy, user satisfaction ratings</li>
                <li>❌ <b>Error Analysis:</b> When and why does the AI make mistakes?</li>
                <li>⚡ <b>Performance:</b> Response speed, server health, uptime</li>
                </ul>
                <b>The Improvement Cycle:</b>
                <ul>
                <li>💬 <b>User Feedback:</b> Thumbs up/down, bug reports, feature requests</li>
                <li>📈 <b>Pattern Detection:</b> Finding common issues or opportunities</li>
                <li>🔄 <b>Regular Updates:</b> New versions with improvements</li>
                <li>🧪 <b>A/B Testing:</b> Comparing different approaches with real users</li>
                </ul>
                <p>Like a restaurant that constantly refines recipes based on customer feedback - the AI keeps getting better!</p>
                """
            }
        ]
        self.current_step = 0
        self.setup_widgets()

    def create_box(self, ax, x, y, width, height, text, color, fontsize=9, alpha=0.8):
        """Create a box with text"""
        box = FancyBboxPatch(
            (x - width/2, y - height/2), width, height,
            boxstyle="round,pad=0.1",
            facecolor=color, edgecolor='#2C3E50', linewidth=2, alpha=alpha
        )
        ax.add_patch(box)
        ax.text(x, y, text, ha='center', va='center',
                fontsize=fontsize, fontweight='bold', color='#2C3E50')
        return box

    def create_arrow(self, ax, x1, y1, x2, y2, text='', curved=False, color='#34495E', alpha=0.8):
        """Create an arrow between points"""
        if curved:
            connection_style = "arc3,rad=0.3"
        else:
            connection_style = "arc3,rad=0"
        arrow = FancyArrowPatch(
            (x1, y1), (x2, y2),
            connectionstyle=connection_style,
            arrowstyle='->,head_width=0.3,head_length=0.3',
            linewidth=2.5, color=color, alpha=alpha
        )
        ax.add_patch(arrow)
        if text:
            mid_x, mid_y = (x1 + x2) / 2, (y1 + y2) / 2
            if curved:
                # Adjust text position for curved arrows
                mid_y += 0.3 if y1 < y2 else -0.3
            ax.text(mid_x, mid_y, text, ha='center', va='center',
                    fontsize=8, fontweight='bold', color=color,
                    bbox=dict(boxstyle="round,pad=0.2", facecolor='white', alpha=0.9))

    def draw_pipeline(self, step_index):
        """Draw the pipeline with highlighting for current step"""
        fig, ax = plt.subplots(1, 1, figsize=(14, 10))
        ax.set_xlim(0, 20)
        ax.set_ylim(0, 14)
        ax.axis('off')
        
        # Get current step info
        current = self.steps[step_index]
        highlight_coords = current.get('highlight', [])
        
        # Title
        ax.text(10, 13.2, 'Modern Large Language Model Development Pipeline',
                fontsize=18, fontweight='bold', ha='center', color='#2C3E50')
        
        # Helper function to determine if component should be highlighted
        def get_color_and_alpha(x, y, default_color):
            if current['stage'] == 'all':
                return default_color, 0.8
            elif any(abs(x - hx) < 0.1 and abs(y - hy) < 0.1 for hx, hy in highlight_coords):
                return self.colors['highlight'], 1.0
            else:
                return self.colors['inactive'], 0.3

        # STAGE 1: DATA COLLECTION & PROCESSING
        stage1_active = current['stage'] in ['all', 'data', 'processing']
        stage1_color = self.colors['data'] if stage1_active else self.colors['inactive']
        ax.text(3, 12.2, 'STAGE 1: DATA COLLECTION & PROCESSING',
                fontsize=11, fontweight='bold', ha='center', color='#2C3E50',
                bbox=dict(boxstyle="round,pad=0.3", facecolor=stage1_color, alpha=0.3))

        # Raw data sources
        for x, text, color_key in [
            (1.5, 'Web Crawling\n(Common Crawl)', 'data'),
            (3.5, 'Books &\nArticles', 'data'),
            (5.5, 'Code\nRepositories', 'data'),
            (7.5, 'Scientific\nPapers', 'data')
        ]:
            color, alpha = get_color_and_alpha(x, 11, self.colors[color_key])
            self.create_box(ax, x, 11, 1.8, 0.8, text, color, 8, alpha=alpha)

        # Data processing
        for x, text in [
            (2.5, 'Data Cleaning\n& Deduplication'),
            (5, 'Quality Filtering\n& Safety'),
            (7.5, 'Tokenization\n(BPE/SentencePiece)')
        ]:
            color, alpha = get_color_and_alpha(x, 9.5, self.colors['processing'])
            self.create_box(ax, x, 9.5, 2, 0.8, text, color, 8, alpha=alpha)

        # STAGE 2: PRE-TRAINING
        stage2_active = current['stage'] in ['all', 'pretraining']
        stage2_color = self.colors['pretraining'] if stage2_active else self.colors['inactive']
        ax.text(3, 8.2, 'STAGE 2: PRE-TRAINING',
                fontsize=11, fontweight='bold', ha='center', color='#2C3E50',
                bbox=dict(boxstyle="round,pad=0.3", facecolor=stage2_color, alpha=0.3))

        # Architecture components
        for x, text in [
            (1.5, 'Transformer\nArchitecture'),
            (3.5, 'Multi-Head\nAttention'),
            (5.5, 'Feed Forward\nNetworks')
        ]:
            color, alpha = get_color_and_alpha(x, 7.5, self.colors['pretraining'])
            self.create_box(ax, x, 7.5, 1.8, 0.8, text, color, 8, alpha=alpha)

        # Training process
        color, alpha = get_color_and_alpha(2.5, 6, self.colors['pretraining'])
        self.create_box(ax, 2.5, 6, 2.5, 1, 'Next Token Prediction\n(Autoregressive)',
                       color, 9, alpha=alpha)
        color, alpha = get_color_and_alpha(6, 6, self.colors['infrastructure'])
        self.create_box(ax, 6, 6, 2, 0.8, 'Distributed Training\n(1000s GPUs)',
                       color, 8, alpha=alpha)

        # STAGE 3: ALIGNMENT & FINE-TUNING
        stage3_active = current['stage'] in ['all', 'alignment']
        stage3_color = self.colors['alignment'] if stage3_active else self.colors['inactive']
        ax.text(12, 8.2, 'STAGE 3: ALIGNMENT & FINE-TUNING',
                fontsize=11, fontweight='bold', ha='center', color='#2C3E50',
                bbox=dict(boxstyle="round,pad=0.3", facecolor=stage3_color, alpha=0.3))

        # Fine-tuning components
        for x, y, text, color_key in [
            (10, 7.5, 'Supervised\nFine-Tuning', 'alignment'),
            (12.5, 7.5, 'Instruction\nDatasets', 'alignment'),
            (15, 7.5, 'Human Feedback\nCollection', 'feedback'),
            (10, 6, 'Reward Model\nTraining', 'alignment'),
            (13, 6, 'PPO/DPO\n(RLHF)', 'alignment'),
            (16, 6, 'Constitutional AI\n(CAI)', 'alignment')
        ]:
            base_color = self.colors[color_key]
            color, alpha = get_color_and_alpha(x, y, base_color)
            width = 2.5 if 'PPO' in text else 2.2
            self.create_box(ax, x, y, width, 0.8, text, color, 8, alpha=alpha)

        # STAGE 4: EVALUATION & SAFETY (moved position)
        stage4_active = current['stage'] in ['all', 'evaluation']
        stage4_color = self.colors['evaluation'] if stage4_active else self.colors['inactive']
        # Fixed: Moved label down to avoid arrow overlap
        ax.text(12.5, 4.9, 'STAGE 4: EVALUATION & SAFETY',
                fontsize=11, fontweight='bold', ha='center', color='#2C3E50',
                bbox=dict(boxstyle="round,pad=0.3", facecolor=stage4_color, alpha=0.3))

        for x, text in [
            (9.5, 'Benchmark\nTesting'),
            (11.5, 'Safety\nEvaluations'),
            (13.5, 'Bias\nTesting'),
            (15.5, 'Red Team\nTesting')
        ]:
            color, alpha = get_color_and_alpha(x, 4, self.colors['evaluation'])
            self.create_box(ax, x, 4, 1.8, 0.7, text, color, 8, alpha=alpha)

        # STAGE 5: DEPLOYMENT & MONITORING
        stage5_active = current['stage'] in ['all', 'deployment']
        stage5_color = self.colors['deployment'] if stage5_active else self.colors['inactive']
        ax.text(3, 4.7, 'STAGE 5: DEPLOYMENT & MONITORING',
                fontsize=11, fontweight='bold', ha='center', color='#2C3E50',
                bbox=dict(boxstyle="round,pad=0.3", facecolor=stage5_color, alpha=0.3))

        for x, text in [
            (1.5, 'Model\nOptimization'),
            (3.5, 'API\nDeployment'),
            (5.5, 'Scaling\nInfrastructure')
        ]:
            color, alpha = get_color_and_alpha(x, 4, self.colors['deployment'])
            self.create_box(ax, x, 4, 1.8, 0.7, text, color, 8, alpha=alpha)

        for x, text in [
            (2.5, 'Usage Monitoring'),
            (5.5, 'Continuous Learning')
        ]:
            color, alpha = get_color_and_alpha(x, 2.8, self.colors['deployment'])
            self.create_box(ax, x, 2.8, 2.2, 0.8, text, color, 8, alpha=alpha)

        # Infrastructure layer
        infra_alpha = 0.3 if current['stage'] == 'all' else 0.1
        infrastructure_box = FancyBboxPatch(
            (0.5, 0.5), 19, 1.5,
            boxstyle="round,pad=0.1",
            facecolor=self.colors['infrastructure'], edgecolor='#2C3E50',
            linewidth=2, alpha=infra_alpha
        )
        ax.add_patch(infrastructure_box)
        ax.text(10, 1.5, 'INFRASTRUCTURE LAYER', fontsize=10, fontweight='bold',
                ha='center', color='#2C3E50', alpha=0.7)

        # Draw arrows with fixed positions
        arrow_alpha = 1.0 if current['stage'] == 'all' else 0.2
        
        # Data flow arrows
        if current['stage'] in ['all', 'data', 'processing']:
            arrow_alpha = 0.8
        for x1, x2 in [(1.5, 2.0), (3.5, 3.0), (5.5, 5.0), (7.5, 7.5)]:
            self.create_arrow(ax, x1, 10.6, x2, 9.9, color='#E74C3C', alpha=arrow_alpha)

        # Main flow arrows with adjusted positions
        if current['stage'] in ['all', 'processing', 'pretraining']:
            self.create_arrow(ax, 7.5, 9.1, 2.5, 7.9, 'Tokenized Data',
                            curved=True, color='#F1C40F', alpha=0.8)
        if current['stage'] in ['all', 'pretraining', 'alignment']:
            self.create_arrow(ax, 6.5, 6, 9, 7.1, 'Base Model',
                            curved=True, color='#27AE60', alpha=0.8)
        # Fixed: Adjusted arrow to avoid Stage 4 label
        if current['stage'] in ['all', 'alignment', 'evaluation']:
            self.create_arrow(ax, 13, 5.6, 11.5, 4.5, 'Aligned Model',
                            curved=True, color='#9B59B6', alpha=0.8)
        # Fixed: Adjusted arrow to avoid Benchmark Testing box
        if current['stage'] in ['all', 'evaluation', 'deployment']:
            self.create_arrow(ax, 8.5, 3.8, 6.5, 3.8, 'Validated Model',
                            color='#9B59B6', alpha=0.8)

        plt.tight_layout()
        return fig

    def setup_widgets(self):
        """Setup interactive widgets"""
        # Create widgets
        self.step_slider = widgets.IntSlider(
            value=0,
            min=0,
            max=len(self.steps)-1,
            step=1,
            description='',
            continuous_update=False,
            orientation='horizontal',
            readout=False,
            layout=widgets.Layout(width='350px')
        )
        
        self.step_label = widgets.HTML(
            value=f"<h3 style='margin: 0;'>{self.steps[0]['name']}</h3>",
            layout=widgets.Layout(width='350px')
        )
        
        self.prev_button = widgets.Button(
            description='◀ Previous',
            button_style='primary',
            layout=widgets.Layout(width='100px', height='35px')
        )
        
        self.next_button = widgets.Button(
            description='Next ▶',
            button_style='primary',
            layout=widgets.Layout(width='100px', height='35px')
        )
        
        self.speed_dropdown = widgets.Dropdown(
            options=['Overview'] + [f'Step {i}' for i in range(1, len(self.steps))],
            value='Overview',
            description='Jump to:',
            layout=widgets.Layout(width='200px')
        )
        
        self.description_output = widgets.HTML(
            value=self.steps[0]['description'],
            layout=widgets.Layout(
                width='400px',
                height='600px',
                padding='20px',
                overflow_y='auto'
            )
        )
        
        self.figure_output = widgets.Output(layout=widgets.Layout(width='900px', height='700px'))

        # Event handlers
        def on_step_change(change):
            self.current_step = change['new']
            self.update_display()

        def on_prev_click(b):
            if self.current_step > 0:
                self.current_step -= 1
                self.step_slider.value = self.current_step

        def on_next_click(b):
            if self.current_step < len(self.steps) - 1:
                self.current_step += 1
                self.step_slider.value = self.current_step

        def on_dropdown_change(change):
            if change['new'] == 'Overview':
                self.current_step = 0
            else:
                self.current_step = int(change['new'].split()[1])
            self.step_slider.value = self.current_step

        # Connect event handlers
        self.step_slider.observe(on_step_change, names='value')
        self.prev_button.on_click(on_prev_click)
        self.next_button.on_click(on_next_click)
        self.speed_dropdown.observe(on_dropdown_change, names='value')

        # Initial display
        self.update_display()

    def update_display(self):
        """Update the display with current step"""
        step = self.steps[self.current_step]
        self.step_label.value = f"<h3 style='margin: 5px 0; color: #2C3E50;'>{step['name']}</h3>"
        
        # Style the description with better formatting
        styled_description = f"""
        <div style='
            background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
            padding: 20px;
            border-radius: 10px;
            height: 580px;
            overflow-y: auto;
            box-shadow: inset 0 2px 4px rgba(0,0,0,0.1);
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
        '>
            {step['description']}
        </div>
        """
        self.description_output.value = styled_description

        # Update buttons
        self.prev_button.disabled = (self.current_step == 0)
        self.next_button.disabled = (self.current_step == len(self.steps) - 1)
        
        # Update dropdown
        if self.current_step == 0:
            self.speed_dropdown.value = 'Overview'
        else:
            self.speed_dropdown.value = f'Step {self.current_step}'

        # Redraw figure
        with self.figure_output:
            clear_output(wait=True)
            fig = self.draw_pipeline(self.current_step)
            plt.show()

    def display(self):
        """Display the complete widget with side-by-side layout"""
        # Create control panel
        control_row1 = widgets.HBox([
            self.prev_button,
            self.next_button,
            self.speed_dropdown
        ])
        
        control_panel = widgets.VBox([
            self.step_label,
            self.step_slider,
            control_row1,
            self.description_output
        ], layout=widgets.Layout(
            padding='10px',
            width='420px'
        ))

        # Main side-by-side layout
        main_layout = widgets.HBox([
            self.figure_output,
            control_panel
        ], layout=widgets.Layout(
            display='flex',
            align_items='flex-start',
            width='100%'
        ))

        # Add title
        title = widgets.HTML(
            value="<h1 style='text-align: center; color: #2C3E50; margin-bottom: 20px;'>🚀 Interactive LLM Development Pipeline</h1>"
        )

        # Complete layout
        complete_layout = widgets.VBox([title, main_layout])
        display(complete_layout)

# Usage
pipeline = LLMPipelineInteractive()
pipeline.display()

VBox(children=(HTML(value="<h1 style='text-align: center; color: #2C3E50; margin-bottom: 20px;'>🚀 Interactive …