# A Simple View of Perpexity...

## Environment Setup

In [36]:
import openai
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json

# Set up plotting style
plt.style.use('default')
sns.set_palette("viridis")

print("Environment ready!")
print(f"OpenAI version: {openai.__version__}")

Environment ready!
OpenAI version: 1.91.0


## API Connection and Test Function

In [37]:
# Load API key from config
with open('config.json', 'r') as f:
    config = json.load(f)

client = openai.OpenAI(
    api_key=config['openai_api_key']
)

# Test function using Chat Completions with logprobs
def get_completion_with_logprobs(prompt, temperature=0.7, max_tokens=10):
    """Get completion with token probabilities using Chat Completions API"""
    response = client.chat.completions.create(
        model="gpt-4.1",  # Latest and best model
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature,
        logprobs=True,
        top_logprobs=5  # top 5 token probabilities
    )
    return response

# Test with our simple example
test_prompt = "The capital of Iceland is"
print(f"Testing with: '{test_prompt}'")

Testing with: 'The capital of Iceland is'


## Token Data Extraction and Perplexity Calculation

In [38]:
def extract_token_data(response):
    """Extract tokens and their probabilities from the response"""
    tokens = []
    probabilities = []
    
    if response.choices[0].logprobs and response.choices[0].logprobs.content:
        for token_data in response.choices[0].logprobs.content:
            tokens.append(token_data.token)
            prob = np.exp(token_data.logprob)
            probabilities.append(prob)
    
    return tokens, probabilities

def calculate_perplexity(probabilities):
    """Calculate perplexity from token probabilities"""
    if not probabilities:
        return float('inf')
    
    # Handle zero probabilities by adding small epsilon
    epsilon = 1e-10
    safe_probs = [max(p, epsilon) for p in probabilities]
    
    # Perplexity = exp(-1/N * Σ log(p_i))
    log_probs = [np.log(p) for p in safe_probs]
    avg_log_prob = np.mean(log_probs)
    perplexity = np.exp(-avg_log_prob)
    
    return perplexity

## Temperature Comparison Functions

In [39]:
import pandas as pd

def compare_temps(prompt, temps=[0.1, 0.7, 1.5], max_tokens=20):
    """Compare perplexity of same prompt at different temperatures"""
    results = []
    
    for temp in temps:
        response = get_completion_with_logprobs(prompt, temperature=temp, max_tokens=max_tokens)
        tokens, probs = extract_token_data(response)
        perplexity = calculate_perplexity(probs)
        generated_text = ''.join(tokens)
        
        results.append({
            'Temperature': temp,
            'Perplexity': round(perplexity, 3),
            'Generated Text': generated_text[:50] + ('...' if len(generated_text) > 50 else ''),
            'Avg Confidence': round(np.mean(probs) if probs else 0, 3)
        })
    
    # Return clean DataFrame instead of print + raw dict
    df = pd.DataFrame(results)
    return df

def compare_prompts(prompts_list, temp=0.7, max_tokens=20):
    """Compare perplexity of different prompts at same temperature"""
    results = []
    
    for prompt in prompts_list:
        response = get_completion_with_logprobs(prompt, temperature=temp, max_tokens=max_tokens)
        tokens, probs = extract_token_data(response)
        perplexity = calculate_perplexity(probs)
        generated_text = ''.join(tokens)
        
        results.append({
            'Prompt': prompt,
            'Perplexity': f"{perplexity:.2f}" if perplexity < 1000 else f"{perplexity:.0f}",
            'Generated': generated_text[:120] + ('...' if len(generated_text) > 120 else ''),
            'Confidence': f"{np.mean(probs):.3f}" if probs else "0.000"
        })
    
    df = pd.DataFrame(results)
    
    # Clean display with better styling
    print(f"\n📊 PROMPT COMPARISON (Temperature: {temp})")
    print("=" * 80)
    
    for i, row in df.iterrows():
        print(f"\n🔹 Prompt: {row['Prompt']}")
        print(f"   Perplexity: {row['Perplexity']} | Confidence: {row['Confidence']}")
        print(f"   Generated: \"{row['Generated']}\"")
        print()
    
    print("\n" + "=" * 80)
    return df

## Interactive Comparison Tools

**Perplexity Interpretation:**
- **1-2**: Very predictable (factual completions)
- **2-10**: Somewhat predictable  
- **10-100**: Moderately uncertain
- **100+**: High uncertainty/ambiguity
- **1000+**: Very confused/ambiguous prompt

In [None]:
from ipywidgets import interact, interactive, Dropdown, Text, FloatSlider, VBox, HBox
from IPython.display import display

# Interactive single prompt temperature comparison
@interact(
    prompt=Text(value="What day is today?", description="Prompt:"),
    temp1=FloatSlider(value=0.1, min=0.0, max=2.0, step=0.1, description="Temp 1:"),
    temp2=FloatSlider(value=0.7, min=0.0, max=2.0, step=0.1, description="Temp 2:"),
    temp3=FloatSlider(value=1.5, min=0.0, max=2.0, step=0.1, description="Temp 3:"),
    max_tokens=Dropdown(options=[10, 20, 50], value=20, description="Max tokens:")
)
def interactive_temp_comparison(prompt, temp1, temp2, temp3, max_tokens):
    """Interactive temperature comparison"""
    temps = [temp1, temp2, temp3]
    results = compare_temps(prompt, temps, max_tokens)
    return results

# Interactive prompt comparison
@interact(
    prompt1=Text(value="Tomatoes are....", description="Prompt 1:"),
    prompt2=Text(value="What are tomatoes?", description="Prompt 2:"),
    prompt3=Text(value="Tomatos!", description="Prompt 3:"),
    temperature=FloatSlider(value=0.7, min=0.0, max=2.0, step=0.1, description="Temperature:"),
    max_tokens=Dropdown(options=[10, 20, 50], value=20, description="Max tokens:")
)
def interactive_prompt_comparison(prompt1, prompt2, prompt3, temperature, max_tokens):
    """Interactive prompt comparison"""
    prompts = [p for p in [prompt1, prompt2, prompt3] if p.strip()]
    results = compare_prompts(prompts, temperature, max_tokens)
    return results

interactive(children=(Text(value='What day is today?', description='Prompt:'), FloatSlider(value=0.1, descript…

interactive(children=(Text(value='If I were a wizard, I would', description='Prompt 1:'), Text(value='If I cou…