In [7]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML, display
import ipywidgets as widgets
import plotly.graph_objects as go
from collections import defaultdict, Counter
import random
import requests
import json
import time

# ============================================
# DARTMOUTH AI API SETUP - USING WORKING ENDPOINTS
# ============================================
from auth_helpers import load_token_from_file

# Working models only!
WORKING_MODELS = {
    "codellama-13b-instruct": "/api/ai/tgi/codellama-13b-instruct-hf",
    "codellama-13b-python": "/api/ai/tgi/codellama-13b-python-hf"
}

API_BASE_URL = "https://api.dartmouth.edu"

def generate_text(prompt, model="codellama-13b-instruct", max_tokens=50, temperature=0.7):
    """Generate text using working Dartmouth models"""
    jwt_token = load_token_from_file()
    
    if not jwt_token:
        # Fallback for demo
        return {"generated_text": prompt + " [demo mode - no API]"}
    
    if model not in WORKING_MODELS:
        model = "codellama-13b-instruct"  # Default to working model
    
    endpoint = WORKING_MODELS[model]
    url = f"{API_BASE_URL}{endpoint}/generate"
    
    headers = {"Authorization": f"Bearer {jwt_token}"}
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_tokens,
            "temperature": temperature,
            "do_sample": True,
            "return_full_text": False
        }
    }
    
    try:
        response = requests.post(url, headers=headers, json=payload, timeout=30)
        if response.status_code == 200:
            result = response.json()
            if isinstance(result, list) and len(result) > 0:
                return result[0]
            return result
        else:
            print(f"API Error {response.status_code}: {response.text[:100]}")
    except Exception as e:
        print(f"Generation error: {e}")
    
    # Smart fallback
    return {"generated_text": prompt + " [API temporarily unavailable]"}

def generate_next_word_predictions(prompt, n_predictions=5):
    """Generate multiple next-word predictions to show the concept"""
    predictions = []
    
    for i in range(n_predictions):
        result = generate_text(prompt, max_tokens=1, temperature=0.8)
        generated = result.get('generated_text', '').strip()
        if generated and generated != prompt:
            # Extract just the new part
            new_text = generated.replace(prompt, '').strip()
            if new_text:
                predictions.append(new_text.split()[0] if new_text.split() else new_text)
    
    return list(set(predictions))  # Remove duplicates

def demonstrate_attention_concept(sentence):
    """Show how attention works conceptually"""
    words = sentence.split()
    
    # Simulate attention weights (normally learned by the model)
    attention_matrix = np.random.rand(len(words), len(words))
    
    # Make it more realistic - words pay more attention to nearby words
    for i in range(len(words)):
        for j in range(len(words)):
            distance = abs(i - j)
            attention_matrix[i][j] = attention_matrix[i][j] * np.exp(-distance/3)
    
    # Normalize rows to sum to 1
    attention_matrix = attention_matrix / attention_matrix.sum(axis=1, keepdims=True)
    
    return words, attention_matrix

# Initialize the system
print("🚀 SETTING UP REAL LLM GENERATION")
print("="*60)

jwt_token = load_token_from_file()
if jwt_token:
    print("✅ JWT token loaded!")
    print("✅ Using working models:")
    for model in WORKING_MODELS:
        print(f"   • {model}")
    
    # Test generation
    print("\n🧪 Testing real generation:")
    test_result = generate_text("The future of AI is", max_tokens=20)
    print(f"   Result: {test_result.get('generated_text', 'Error')}")
    
    print("\n🔥 READY FOR LLM DEMONSTRATIONS!")
    print("   • Next-word prediction ✅")
    print("   • Attention visualization ✅") 
    print("   • Real API calls ✅")
    
    api_working = True
else:
    print("⚠️ No token - using demo mode")
    api_working = False

print("="*60)

# Test functions for your notebook
if api_working:
    print("\n📝 Quick demo of capabilities:")
    
    # Demo 1: Next word prediction
    prompt = "Machine learning is"
    predictions = generate_next_word_predictions(prompt, 3)
    print(f"\nNext word predictions for '{prompt}':")
    for pred in predictions:
        print(f"   → {prompt} {pred}")
    
    # Demo 2: Longer generation
    result = generate_text("Artificial intelligence will help humans by", max_tokens=30)
    print(f"\nLonger generation:")
    print(f"   {result.get('generated_text', 'Error')}")
    
    # Demo 3: Attention concept
    sentence = "The cat sat on the mat"
    words, attention = demonstrate_attention_concept(sentence)
    print(f"\nAttention concept for: '{sentence}'")
    print(f"   Words: {words}")
    print(f"   Each word 'attends' to others (attention matrix: {attention.shape})")

print(f"\n🎯 Your generate_text() function is ready!")
print(f"   Usage: generate_text('Your prompt here', max_tokens=50)")

🚀 SETTING UP REAL LLM GENERATION
✅ JWT token loaded!
✅ Using working models:
   • codellama-13b-instruct
   • codellama-13b-python

🧪 Testing real generation:
   Result:  uncertain, but we can’t afford to wait for it to happen.
It’s 

🔥 READY FOR LLM DEMONSTRATIONS!
   • Next-word prediction ✅
   • Attention visualization ✅
   • Real API calls ✅

📝 Quick demo of capabilities:

Next word predictions for 'Machine learning is':
   → Machine learning is a
   → Machine learning is becoming

Longer generation:
    allowing them to work less and live longer.
A robot, which will be the first of its kind in the world, has been created to take

Attention concept for: 'The cat sat on the mat'
   Words: ['The', 'cat', 'sat', 'on', 'the', 'mat']
   Each word 'attends' to others (attention matrix: (6, 6))

🎯 Your generate_text() function is ready!
   Usage: generate_text('Your prompt here', max_tokens=50)


In [8]:
class RealLanguageModel:
    """Demonstrates sliding window limitations using actual LLM"""
    def __init__(self, window_size=3, jwt_token=None):
        self.window_size = window_size
        self.jwt_token = jwt_token
        # Fallback predictions for when API is unavailable
        self.fallback_map = {
            ('research', 'finally', 'published'): ['a', 'the', 'in'],  # Wrong!
            ('scientist', 'finally', 'published'): ['her', 'his', 'their'],  # Right!
            ('the', 'cat', 'sat'): ['on', 'down', 'quietly'],
            ('machine', 'learning', 'algorithms'): ['can', 'are', 'help'],
            ('artificial', 'intelligence', 'will'): ['transform', 'help', 'improve']
        }
    
    def predict_next_word(self, text, use_api=True):
        words = text.split()
        
        # Simulate sliding window
        if len(words) >= self.window_size:
            visible_context = words[-self.window_size:]
            hidden_context = words[:-self.window_size]
        else:
            visible_context = words
            hidden_context = []
        
        # Get prediction
        if use_api and self.jwt_token:
            # Use real LLM with limited context
            limited_prompt = ' '.join(visible_context)
            result = generate_text(limited_prompt, max_tokens=1, temperature=0.5)
            
            # Extract just the next word
            generated = result.get('generated_text', '')
            if generated and generated != limited_prompt:
                # Remove the original prompt and get first new word
                new_part = generated.replace(limited_prompt, '').strip()
                prediction = new_part.split()[0] if new_part.split() else '???'
            else:
                prediction = '???'
        else:
            # Use fallback
            key = tuple(w.lower() for w in visible_context)
            predictions = self.fallback_map.get(key, ['???'])
            prediction = predictions[0] if predictions else '???'
        
        return {
            'full_text': text,
            'hidden': hidden_context,
            'visible': visible_context,
            'prediction': prediction
        }
    
    def predict_with_full_context(self, text, use_api=True):
        """Get prediction using full context (not windowed)"""
        if use_api and self.jwt_token:
            result = generate_text(text, max_tokens=1, temperature=0.5)
            generated = result.get('generated_text', '')
            if generated and generated != text:
                new_part = generated.replace(text, '').strip()
                return new_part.split()[0] if new_part.split() else 'findings'
            return 'findings'
        else:
            # Smart fallback based on context
            text_lower = text.lower()
            if 'scientist' in text_lower and 'research' in text_lower:
                return 'their'
            elif 'research' in text_lower:
                return 'findings'
            elif 'cat' in text_lower:
                return 'on'
            else:
                return 'important'
    
    def visualize_prediction(self, text):
        result = self.predict_next_word(text)
        print("📝 SLIDING WINDOW PREDICTION (with Real LLM)\n")
        print(f"Full text: '{text} ___'\n")
        
        # Visualize what the model can and cannot see
        if result['hidden']:
            print("❌ INVISIBLE (forgotten):", ' '.join(result['hidden']))
            print("✅ VISIBLE (can see):     ", ' '.join(result['visible']))
        else:
            print("✅ VISIBLE:", ' '.join(result['visible']))
        
        print(f"\n🤖 Model prediction based on '{' '.join(result['visible'])}':")
        print(f"   → {result['prediction']}")
        
        # Now show what happens with full context
        if result['hidden']:
            print("\n🧠 WITH FULL CONTEXT:")
            full_prediction = self.predict_with_full_context(text)
            print(f"   → {full_prediction} (much better!)")
            
            # Show the improvement
            if result['prediction'] != full_prediction:
                print(f"\n💡 IMPROVEMENT: '{result['prediction']}' → '{full_prediction}'")
                print("   Full context helps the model make better predictions!")
        
        return result
    
    def compare_window_sizes(self, text):
        """Compare predictions with different window sizes"""
        print(f"🔍 COMPARING WINDOW SIZES for: '{text} ___'\n")
        
        original_window = self.window_size
        window_sizes = [1, 2, 3, 5, len(text.split())]
        
        for window in window_sizes:
            self.window_size = window
            result = self.predict_next_word(text)
            
            context_str = ' '.join(result['visible']) if result['visible'] else '[empty]'
            print(f"Window size {window:2}: '{context_str}' → {result['prediction']}")
        
        # Restore original window size
        self.window_size = original_window
        
        print(f"\n💡 Larger windows generally give better context!")

# Demo with working setup
jwt_token = load_token_from_file()  # This should work from your setup
model = RealLanguageModel(window_size=3, jwt_token=jwt_token)

print("🚀 TESTING SLIDING WINDOW vs FULL CONTEXT")
print("="*60)

# Test cases that show the limitation
test_cases = [
    "The scientist worked for years on the research finally published",
    "After extensive research the team finally published", 
    "The cat walked across the room and sat",
    "Machine learning algorithms are powerful tools that can"
]

for i, test_text in enumerate(test_cases, 1):
    print(f"\n📖 TEST CASE {i}:")
    print("-" * 40)
    model.visualize_prediction(test_text)
    
    if i < len(test_cases):
        print("\n" + "="*60)

print(f"\n🎯 Key Insight: Sliding windows lose important context!")
print("   This is why modern LLMs use attention mechanisms")
print("   to 'remember' relevant information from anywhere in the text.")


# Basic demo
model.visualize_prediction("The scientist worked for years on the research finally published")

# Compare different window sizes
model.compare_window_sizes("The scientist worked for years on the research finally published")


🚀 TESTING SLIDING WINDOW vs FULL CONTEXT

📖 TEST CASE 1:
----------------------------------------
📝 SLIDING WINDOW PREDICTION (with Real LLM)

Full text: 'The scientist worked for years on the research finally published ___'

❌ INVISIBLE (forgotten): The scientist worked for years on the
✅ VISIBLE (can see):      research finally published

🤖 Model prediction based on 'research finally published':
   → on

🧠 WITH FULL CONTEXT:
   → in (much better!)

💡 IMPROVEMENT: 'on' → 'in'
   Full context helps the model make better predictions!


📖 TEST CASE 2:
----------------------------------------
📝 SLIDING WINDOW PREDICTION (with Real LLM)

Full text: 'After extensive research the team finally published ___'

❌ INVISIBLE (forgotten): After extensive research the
✅ VISIBLE (can see):      team finally published

🤖 Model prediction based on 'team finally published':
   → the

🧠 WITH FULL CONTEXT:
   → the (much better!)


📖 TEST CASE 3:
----------------------------------------
📝 SLIDING WINDOW 

In [11]:
@widgets.interact
def explore_context_importance(
    window_size=widgets.IntSlider(min=1, max=8, value=3, description='Window:'),
    example=widgets.Dropdown(
        options=[
            "The scientist who discovered the new element after years of research finally published",
            "The book that was written by the famous author last year became",
            "The cat that lived in the old house on the hill always sat on",
            "After running the marathon for four hours she finally reached"
        ],
        description='Example:',
        layout=widgets.Layout(width='90%')
    )
):
    # Use the RealLanguageModel we created earlier
    model = RealLanguageModel(window_size=window_size, jwt_token=load_token_from_file())
    print(f"{'='*70}")
    result = model.visualize_prediction(example)
    print(f"{'='*70}")
    
    # Show the problem
    if result['hidden']:
        hidden_words = [w.lower() for w in result['hidden']]
        if 'scientist' in hidden_words:
            print("\n⚠️ PROBLEM: Lost critical information about 'scientist'!")
            print("The model can't know to predict 'her/his research'")
        elif 'book' in hidden_words:
            print("\n⚠️ PROBLEM: Lost that we're talking about a 'book'!")
            print("The model can't know to predict 'bestseller'")
        elif 'cat' in hidden_words:
            print("\n⚠️ PROBLEM: Lost that we're talking about a 'cat'!")
            print("The model can't predict cat-related actions properly")
        elif 'marathon' in hidden_words:
            print("\n⚠️ PROBLEM: Lost that this is about a 'marathon'!")
            print("The model can't know to predict 'finish line'")
        else:
            print(f"\n⚠️ PROBLEM: Lost important context: {' '.join(result['hidden'])}")
            print("This shows why larger context windows are better!")

interactive(children=(IntSlider(value=3, description='Window:', max=8, min=1), Dropdown(description='Example:'…

In [18]:
import plotly.graph_objects as go
import numpy as np

def create_attention_network(sentence, show_multiple_heads=True):
    """
    Create a 3D attention network visualization for students
    """
    words = sentence.split()
    n_words = len(words)
    n_heads = 4 if show_multiple_heads else 1
    
    # Generate attention patterns
    attention_heads = []
    head_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
    
    np.random.seed(42)
    for head in range(n_heads):
        attention_weights = np.random.rand(n_words, n_words)
        
        if head == 0:  # Adjacent words
            for i in range(n_words-1):
                attention_weights[i, i+1] *= 3
        elif head == 1:  # Content words
            content_words = ['quick', 'brown', 'fox', 'jumps', 'lazy', 'dog', 'researcher', 'AI', 'published']
            for i, word in enumerate(words):
                if word.lower() in content_words:
                    for j in range(n_words):
                        if i != j:
                            attention_weights[i, j] *= 2
        
        # Normalize
        attention_weights = attention_weights / attention_weights.sum(axis=1, keepdims=True)
        attention_heads.append(attention_weights)
    
    # Create the figure
    fig = go.Figure()
    
    # Position words in a circle
    angles = np.linspace(0, 2*np.pi, n_words, endpoint=False)
    radius = 4
    x_words = np.cos(angles) * radius
    y_words = np.sin(angles) * radius
    z_words = np.zeros(n_words)
    
    # Add attention connections
    for head_idx in range(n_heads):
        attention_weights = attention_heads[head_idx]
        color = head_colors[head_idx]
        
        for i in range(n_words):
            for j in range(n_words):
                if i != j and attention_weights[i, j] > 0.1:
                    weight = attention_weights[i, j]
                    
                    # Create curved path
                    t = np.linspace(0, 1, 20)
                    curve_height = weight * 3 + head_idx * 0.5
                    
                    x_path = x_words[i] * (1-t) + x_words[j] * t
                    y_path = y_words[i] * (1-t) + y_words[j] * t
                    z_path = curve_height * np.sin(np.pi * t)
                    
                    fig.add_trace(go.Scatter3d(
                        x=x_path, 
                        y=y_path, 
                        z=z_path,
                        mode='lines',
                        line=dict(color=color, width=max(2, weight * 8)),
                        showlegend=False,
                        hovertemplate=f'{words[i]} → {words[j]}<br>Weight: {weight:.2f}<extra></extra>'
                    ))
    
    # Add word nodes
    fig.add_trace(go.Scatter3d(
        x=x_words, 
        y=y_words, 
        z=z_words,
        mode='markers+text',
        text=words,
        textposition='middle center',
        marker=dict(size=15, color='gold', line=dict(color='black', width=2)),
        textfont=dict(size=12, color='black'),
        showlegend=False,
        hovertemplate='<b>%{text}</b><extra></extra>'
    ))
    
    # Simple layout
    fig.update_layout(
        title=f'Attention Network: "{sentence}"',
        scene=dict(
            xaxis=dict(visible=False),
            yaxis=dict(visible=False),
            zaxis=dict(visible=False),
            bgcolor='rgb(20,20,20)',
            camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
        ),
        paper_bgcolor='rgb(20,20,20)',
        font=dict(color='white'),
        height=600,
        showlegend=False
    )
    
    return fig

# Simple interactive widget
from ipywidgets import interact, Dropdown, Checkbox

@interact
def show_attention_network(
    sentence=Dropdown(
        options=[
            "The quick brown fox jumps",
            "The researcher studied AI for years", 
            "Machine learning processes natural language"
        ],
        value="The quick brown fox jumps",
        description='Sentence:'
    ),
    multiple_heads=Checkbox(value=True, description='Show Multiple Heads')
):
    print("Creating attention visualization...")
    
    fig = create_attention_network(sentence, multiple_heads)
    fig.show()
    
    print("Each colored line shows how words pay attention to each other")
    if multiple_heads:
        print("Different colors = different types of attention")

# Test it directly
print("Attention Network Visualization")
print("=" * 40)

# Create a simple test
test_fig = create_attention_network("The cat sat on the mat")
test_fig.show()

interactive(children=(Dropdown(description='Sentence:', options=('The quick brown fox jumps', 'The researcher …

Attention Network Visualization


In [None]:
# Interactive sentence input
sentence_input = widgets.Textarea(
    value='The animal which was a cat sat on the comfortable mat',
    description='Sentence:',
    layout=widgets.Layout(width='80%')
)

button = widgets.Button(description="Visualize Attention")
output = widgets.Output()

def on_button_click(b):
    with output:
        output.clear_output()
        create_attention_matrix(sentence_input.value)

button.on_click(on_button_click)
display(sentence_input, button, output)

In [None]:
def compare_processing_approaches():
    """
    Visualize sequential vs parallel processing
    """
    sentence = "The quick brown fox jumps"
    words = sentence.split()
    
    print("🐌 OLD WAY - Sequential Processing (RNN/LSTM):")
    print("One word at a time, in order:\n")
    for i, word in enumerate(words):
        print(f"Step {i+1}: Process '{word}'", end="")
        print(" → " if i < len(words)-1 else " ✓")
    
    print("\n" + "="*50 + "\n")
    
    print("⚡ NEW WAY - Parallel Processing (Transformer):")
    print("All words at once, using attention:\n")
    print("Step 1: Process ALL words simultaneously:")
    for word in words:
        print(f"  [{word}]", end=" ")
    print("\n  ↓ ↓ ↓ ↓ ↓")
    print("  All connected through attention!")
    print("  ✓ Complete understanding in one pass!")

compare_processing_approaches()

In [None]:
advantages = {
    "🚀 Speed": "Process entire sequences in parallel",
    "🔗 Long-range Dependencies": "Connect distant related words easily",
    "🧠 Context Understanding": "Every word sees the full picture",
    "📈 Scalability": "Add more layers = more capability"
}

for key, value in advantages.items():
    print(f"{key}: {value}")

In [None]:
# Model evolution data
models = {
    'GPT-1 (2018)': {
        'parameters': 0.117,  # in billions
        'context': 512,
        'capabilities': ['Basic text completion', 'Simple Q&A']
    },
    'GPT-2 (2019)': {
        'parameters': 1.5,
        'context': 1024,
        'capabilities': ['Coherent paragraphs', 'Simple stories', 'Basic reasoning']
    },
    'GPT-3 (2020)': {
        'parameters': 175,
        'context': 2048,
        'capabilities': ['Complex reasoning', 'Code generation', 'Translation', 'Analysis']
    },
    'GPT-4 (2023)': {
        'parameters': 1760,  # estimated
        'context': 128000,
        'capabilities': ['Multimodal', 'Advanced reasoning', 'Complex code', 'Long documents']
    }
}

# Visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Parameters growth
model_names = list(models.keys())
params = [models[m]['parameters'] for m in model_names]
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']

ax1.bar(range(len(model_names)), params, color=colors)
ax1.set_yscale('log')
ax1.set_ylabel('Parameters (Billions)', fontsize=12)
ax1.set_title('Model Size Growth (Log Scale)', fontsize=14)
ax1.set_xticks(range(len(model_names)))
ax1.set_xticklabels([m.split()[0] for m in model_names])

# Context window growth
context = [models[m]['context'] for m in model_names]
ax2.bar(range(len(model_names)), context, color=colors)
ax2.set_yscale('log')
ax2.set_ylabel('Context Length (Tokens)', fontsize=12)
ax2.set_title('Context Window Growth', fontsize=14)
ax2.set_xticks(range(len(model_names)))
ax2.set_xticklabels([m.split()[0] for m in model_names])

plt.tight_layout()
plt.show()

# Show capabilities evolution
print("\n📊 CAPABILITIES EVOLUTION:\n")
for model, info in models.items():
    print(f"{model}:")
    for capability in info['capabilities']:
        print(f"  ✓ {capability}")
    print()