# Bumplot Visualization Demo

This notebook demonstrates the new bumplot visualization feature for particle trajectories in Quantum Conversations.

In [None]:
import sys
sys.path.append('../')

from quantum_conversations import ParticleFilter, TokenSequenceVisualizer, ModelManager
import matplotlib.pyplot as plt
import numpy as np

# Set style for better visualizations
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

## 1. Basic Bumplot Visualization

Generate particles and visualize their trajectories using a bump plot.

In [None]:
# Initialize particle filter with a small model
model_manager = ModelManager()
pf = ParticleFilter(
    model_name="EleutherAI/pythia-70m",
    n_particles=10,
    temperature=1.0,
    device="cpu",
    model_manager=model_manager,
    seed=42
)

# Generate particles
prompt = "The future of artificial intelligence"
particles = pf.generate(prompt, max_new_tokens=20)

print(f"Generated {len(particles)} particles")
print(f"Each particle has {len(particles[0].token_ids)} tokens")

In [None]:
# Create bumplot visualization with probability coloring
viz = TokenSequenceVisualizer(tokenizer=pf.tokenizer)
fig = viz.visualize_bumplot(
    particles,
    color_by='transition_prob',
    max_vocab_display=50,
    show_tokens=False,
    figsize=(14, 8)
)
plt.title(f"Bumplot: Token Trajectories for '{prompt}'")
plt.tight_layout()
plt.show()

## 2. Comparing Different Color Schemes

Visualize the same particles with different coloring strategies.

In [None]:
# Generate particles with higher temperature for more diversity
pf_diverse = ParticleFilter(
    model_name="EleutherAI/pythia-70m",
    n_particles=8,
    temperature=1.5,
    device="cpu",
    model_manager=model_manager,
    seed=123
)

prompt_diverse = "Once upon a time"
particles_diverse = pf_diverse.generate(prompt_diverse, max_new_tokens=15)

viz_diverse = TokenSequenceVisualizer(tokenizer=pf_diverse.tokenizer)

In [None]:
# Create subplots for different color schemes
color_schemes = ['transition_prob', 'entropy', 'particle_id']
titles = ['Colored by Transition Probability', 'Colored by Entropy', 'Colored by Particle ID']

for i, (color_by, title) in enumerate(zip(color_schemes, titles)):
    fig = viz_diverse.visualize_bumplot(
        particles_diverse,
        color_by=color_by,
        max_vocab_display=30,
        show_tokens=False,
        figsize=(12, 6)
    )
    plt.title(f"{title}\nPrompt: '{prompt_diverse}'")
    plt.tight_layout()
    plt.show()
    print(f"\n{title} visualization complete\n" + "="*50)

## 3. Convergence vs Divergence Patterns

Compare low and high temperature generation to see convergence and divergence patterns.

In [None]:
# Low temperature for convergence
pf_converge = ParticleFilter(
    model_name="EleutherAI/pythia-70m",
    n_particles=10,
    temperature=0.2,
    device="cpu",
    model_manager=model_manager,
    seed=42
)

# High temperature for divergence
pf_diverge = ParticleFilter(
    model_name="EleutherAI/pythia-70m",
    n_particles=10,
    temperature=2.0,
    device="cpu",
    model_manager=model_manager,
    seed=42
)

prompt_test = "The answer to life is"
particles_converge = pf_converge.generate(prompt_test, max_new_tokens=15)
particles_diverge = pf_diverge.generate(prompt_test, max_new_tokens=15)

In [None]:
# Visualize convergence (low temperature)
viz_converge = TokenSequenceVisualizer(tokenizer=pf_converge.tokenizer)
fig = viz_converge.visualize_bumplot(
    particles_converge,
    color_by='particle_id',
    max_vocab_display=40,
    show_tokens=False,
    figsize=(14, 7)
)
plt.title(f"Low Temperature (0.2) - Convergent Behavior\nPrompt: '{prompt_test}'")
plt.tight_layout()
plt.show()

# Visualize divergence (high temperature)
viz_diverge = TokenSequenceVisualizer(tokenizer=pf_diverge.tokenizer)
fig = viz_diverge.visualize_bumplot(
    particles_diverge,
    color_by='particle_id',
    max_vocab_display=40,
    show_tokens=False,
    figsize=(14, 7)
)
plt.title(f"High Temperature (2.0) - Divergent Behavior\nPrompt: '{prompt_test}'")
plt.tight_layout()
plt.show()

## 4. With Token Labels

Show actual token text on the bump plot for interpretability.

In [None]:
# Generate shorter sequence for clearer token labels
pf_labels = ParticleFilter(
    model_name="EleutherAI/pythia-70m",
    n_particles=6,
    temperature=0.8,
    device="cpu",
    model_manager=model_manager,
    seed=42
)

prompt_short = "Hello world"
particles_short = pf_labels.generate(prompt_short, max_new_tokens=8)

viz_labels = TokenSequenceVisualizer(tokenizer=pf_labels.tokenizer)
fig = viz_labels.visualize_bumplot(
    particles_short,
    color_by='transition_prob',
    max_vocab_display=15,  # Limit for clearer labels
    show_tokens=True,  # Show token text
    figsize=(14, 8)
)
plt.title(f"Bumplot with Token Labels\nPrompt: '{prompt_short}'")
plt.tight_layout()
plt.show()

## 5. Larger Model Comparison

Compare particle trajectories from different model sizes.

In [None]:
# Small model
pf_small = ParticleFilter(
    model_name="EleutherAI/pythia-70m",
    n_particles=6,
    temperature=1.0,
    device="cpu",
    model_manager=model_manager,
    seed=42
)

# Larger model (GPT-2)
pf_large = ParticleFilter(
    model_name="gpt2",
    n_particles=6,
    temperature=1.0,
    device="cpu",
    model_manager=model_manager,
    seed=42
)

prompt_compare = "In the beginning"
particles_small_model = pf_small.generate(prompt_compare, max_new_tokens=12)
particles_large_model = pf_large.generate(prompt_compare, max_new_tokens=12)

In [None]:
# Visualize small model
viz_small = TokenSequenceVisualizer(tokenizer=pf_small.tokenizer)
fig = viz_small.visualize_bumplot(
    particles_small_model,
    color_by='transition_prob',
    max_vocab_display=30,
    show_tokens=False,
    figsize=(12, 6)
)
plt.title(f"Pythia-70M Model\nPrompt: '{prompt_compare}'")
plt.tight_layout()
plt.show()

# Visualize large model
viz_large = TokenSequenceVisualizer(tokenizer=pf_large.tokenizer)
fig = viz_large.visualize_bumplot(
    particles_large_model,
    color_by='transition_prob',
    max_vocab_display=30,
    show_tokens=False,
    figsize=(12, 6)
)
plt.title(f"GPT-2 Model\nPrompt: '{prompt_compare}'")
plt.tight_layout()
plt.show()

## 6. Combined Visualizations

Show bumplot alongside traditional Sankey diagram.

In [None]:
# Generate particles for combined visualization
pf_combined = ParticleFilter(
    model_name="EleutherAI/pythia-70m",
    n_particles=8,
    temperature=0.9,
    device="cpu",
    model_manager=model_manager,
    seed=42
)

prompt_combined = "The meaning of life"
particles_combined = pf_combined.generate(prompt_combined, max_new_tokens=12)
viz_combined = TokenSequenceVisualizer(tokenizer=pf_combined.tokenizer)

In [None]:
# Bumplot visualization
print("Bumplot Visualization:")
print("="*50)
fig_bump = viz_combined.visualize_bumplot(
    particles_combined,
    color_by='transition_prob',
    max_vocab_display=30,
    show_tokens=False,
    figsize=(14, 7)
)
plt.title(f"Bumplot: Token Trajectories\nPrompt: '{prompt_combined}'")
plt.tight_layout()
plt.show()

print("\nSankey Diagram:")
print("="*50)
# Traditional Sankey visualization
fig_sankey = viz_combined.visualize(
    particles_combined,
    prompt_combined,
    figsize=(14, 8)
)
plt.tight_layout()
plt.show()

## 7. Analysis Metrics

Extract and analyze metrics from the particle trajectories.

In [None]:
from quantum_conversations import compute_sequence_entropy, compute_divergence_score

# Compute entropy for each particle
entropies = []
for particle in particles_combined:
    entropy = compute_sequence_entropy(particle)
    entropies.append(entropy)

# Compute divergence score
divergence = compute_divergence_score(particles_combined)

print(f"Particle Entropy Statistics:")
print(f"  Mean entropy: {np.mean(entropies):.4f}")
print(f"  Std entropy: {np.std(entropies):.4f}")
print(f"  Min entropy: {np.min(entropies):.4f}")
print(f"  Max entropy: {np.max(entropies):.4f}")
print(f"\nDivergence score: {divergence:.4f}")

# Plot entropy distribution
plt.figure(figsize=(8, 4))
plt.hist(entropies, bins=20, alpha=0.7, color='blue', edgecolor='black')
plt.xlabel('Sequence Entropy')
plt.ylabel('Count')
plt.title('Distribution of Particle Entropies')
plt.axvline(np.mean(entropies), color='red', linestyle='--', label=f'Mean: {np.mean(entropies):.4f}')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Summary

This notebook demonstrated the new bumplot visualization feature for Quantum Conversations:

1. **Basic bumplot** showing token trajectories over time
2. **Different color schemes** (probability, entropy, particle ID)
3. **Convergence vs divergence** patterns with temperature control
4. **Token labels** for interpretability
5. **Model comparisons** between different sizes
6. **Combined visualizations** with traditional Sankey diagrams
7. **Quantitative metrics** for analyzing particle behavior

The bumplot visualization provides a clear view of how different particles explore the token space over time, making it easier to understand the divergence and convergence patterns in language model generation.