In [None]:
from openai import OpenAI
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from wordcloud import WordCloud
import time

load_dotenv()
client = OpenAI()

print("Top-p and Top-k Sampling Explorer Ready!")

In [None]:
prompt = "List 5 synonyms for happy."
results = {}

print("Top-p Nucleus Sampling Experiment")
print(f"Prompt: '{prompt}'\n")

for top_p in [0.1, 0.5, 0.8, 1.0]:
    print(f"Top-p: {top_p}")
    
    all_synonyms = []
    runs_output = []
    
    for i in range(3):
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.8,
            top_p=top_p,
            max_tokens=50
        )
        
        output = response.choices[0].message.content
        runs_output.append(output)
        print(f"  Run {i+1}: {output}")
        
        words = output.lower().replace('\n', ' ').replace(',', ' ').replace('.', ' ').split()
        synonyms = [word.strip('123456789.-:') for word in words 
                   if len(word) > 2 and word not in ['for', 'the', 'and', 'are', 'that', 'happy']]
        all_synonyms.extend(synonyms)
    
    unique_synonyms = list(set(all_synonyms))
    diversity_score = len(unique_synonyms) / len(all_synonyms) if all_synonyms else 0
    
    results[top_p] = {
        'runs': runs_output,
        'unique_synonyms': unique_synonyms,
        'unique_count': len(unique_synonyms),
        'total_words': len(all_synonyms),
        'diversity_score': diversity_score
    }
    
    print(f"  Diversity: {diversity_score:.2f} | Unique: {len(unique_synonyms)}")
    print()

print("Top-p experiment complete!")

In [None]:
simulated_configs = {
    "ultra_conservative": {"top_p": 0.01, "temp": 0.1, "description": "Simulates top-k ≈ 1-2"},
    "conservative": {"top_p": 0.05, "temp": 0.3, "description": "Simulates top-k ≈ 5-10"},
    "moderate": {"top_p": 0.2, "temp": 0.5, "description": "Simulates top-k ≈ 20-30"},
    "diverse": {"top_p": 0.6, "temp": 0.7, "description": "Simulates top-k ≈ 50+"}
}

topk_results = {}
prompt_topk = "Generate 3 creative adjectives to describe a sunset."

print("Simulated Top-k Experiment")
print(f"Prompt: '{prompt_topk}'\n")

for config_name, config in simulated_configs.items():
    print(f"{config_name.upper()}: {config['description']}")
    
    all_adjectives = []
    
    for run in range(3):  
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt_topk}],
            temperature=config['temp'],
            top_p=config['top_p'],
            max_tokens=80
        )
        
        output = response.choices[0].message.content
        print(f"  Run {run + 1}: {output}")
        
        words = output.lower().replace('\n', ' ').replace(',', ' ').replace('.', ' ').split()
        adjectives = [word.strip('123456789.-') for word in words 
                     if len(word) > 3 and word not in 
                     ['the', 'and', 'for', 'are', 'that', 'here', 'sunset', 'adjectives']]
        all_adjectives.extend(adjectives)
        
        time.sleep(0.3)
    
    unique_adjectives = list(set(all_adjectives))
    diversity = len(unique_adjectives) / len(all_adjectives) if all_adjectives else 0
    
    topk_results[config_name] = {
        'config': config,
        'unique_adjectives': unique_adjectives,
        'diversity_score': diversity
    }
    
    print(f"  Diversity: {diversity:.2f} | Unique: {len(unique_adjectives)}")
    print()

print("Simulated top-k experiment complete!")

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
fig.suptitle('Top-p and Simulated Top-k Analysis', fontsize=14, fontweight='bold')

top_p_vals = list(results.keys())
diversity_scores = [results[tp]['diversity_score'] for tp in top_p_vals]


axes[0,0].plot(top_p_vals, diversity_scores, 'bo-', linewidth=2, markersize=8)
axes[0,0].set_title('Top-p vs Diversity')
axes[0,0].set_xlabel('Top-p Value')
axes[0,0].set_ylabel('Diversity Score')
axes[0,0].grid(True, alpha=0.3)


unique_counts = [results[tp]['unique_count'] for tp in top_p_vals]
axes[0,1].bar(range(len(top_p_vals)), unique_counts, color='skyblue', alpha=0.8)
axes[0,1].set_title('Top-p vs Unique Words')
axes[0,1].set_xlabel('Top-p Value')
axes[0,1].set_ylabel('Unique Words')
axes[0,1].set_xticks(range(len(top_p_vals)))
axes[0,1].set_xticklabels([str(tp) for tp in top_p_vals])

config_names = list(topk_results.keys())
topk_diversity = [topk_results[name]['diversity_score'] for name in config_names]

axes[1,0].bar(range(len(config_names)), topk_diversity, color='lightgreen', alpha=0.8)
axes[1,0].set_title('Simulated Top-k vs Diversity')
axes[1,0].set_xlabel('Configuration')
axes[1,0].set_ylabel('Diversity Score')
axes[1,0].set_xticks(range(len(config_names)))
axes[1,0].set_xticklabels(config_names, rotation=45, ha='right')


axes[1,1].plot(top_p_vals, diversity_scores, 'bo-', label='Real Top-p', linewidth=2)
axes[1,1].bar(range(len(config_names)), topk_diversity, alpha=0.5, color='lightgreen', label='Simulated Top-k')
axes[1,1].set_title('Real vs Simulated Diversity')
axes[1,1].set_ylabel('Diversity Score')
axes[1,1].legend()

plt.tight_layout()
plt.show()

print("EXPERIMENT SUMMARY")
print("="*50)

top_p_df = pd.DataFrame({
    'Top_p': top_p_vals,
    'Diversity': diversity_scores,
    'Unique_Words': unique_counts
})
print("Top-p Results:")
print(top_p_df.round(3).to_string(index=False))

print("\nSimulated Top-k Results:")
for name, data in topk_results.items():
    config = data['config']
    print(f"{name:>15}: diversity={data['diversity_score']:.3f}, top_p={config['top_p']}, temp={config['temp']}")


most_diverse = max(results.keys(), key=lambda x: results[x]['diversity_score'])
least_diverse = min(results.keys(), key=lambda x: results[x]['diversity_score'])

print(f"\nKey Findings:")
print(f"Most diverse top-p: {most_diverse} (diversity: {results[most_diverse]['diversity_score']:.3f})")
print(f"Least diverse top-p: {least_diverse} (diversity: {results[least_diverse]['diversity_score']:.3f})")

correlation = np.corrcoef(top_p_vals, diversity_scores)[0, 1]
print(f"Top-p ↔ Diversity correlation: {correlation:.3f}")
print("Higher top-p increases diversity" if correlation > 0.5 else "Weak correlation observed")