# Model Comparison

Compare calibration quality across LLM providers to find the optimal cost/accuracy tradeoff.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from hivesight_calibration import LLMSurvey
from hivesight_calibration.llm import MODEL_PRICING

## Model Pricing Overview

In [None]:
# Current pricing (per 1M tokens)
pricing_df = pd.DataFrame(MODEL_PRICING).T
pricing_df.columns = ['Input ($/1M)', 'Output ($/1M)']

# Estimate cost per response (assuming ~200 input, ~50 output tokens)
pricing_df['Est. $/Response'] = (
    pricing_df['Input ($/1M)'] * 200 / 1_000_000 +
    pricing_df['Output ($/1M)'] * 50 / 1_000_000
)

print("Model Pricing:")
print(pricing_df.round(6))

## Models to Compare

| Model | Provider | Est. Cost/Response | Notes |
|-------|----------|-------------------|-------|
| gpt-4o-mini | OpenAI | $0.00006 | Baseline |
| gpt-4o | OpenAI | $0.00100 | Premium |
| gemini-2.5-flash | Google | $0.00003 | Cheapest |
| claude-3.5-haiku | Anthropic | $0.00004 | Alternative |

In [None]:
# Placeholder for model comparison results
# Will be populated after running calibration for each model

comparison_results = pd.DataFrame({
    'Model': ['gpt-4o-mini', 'gpt-4o', 'gemini-flash', 'claude-haiku'],
    'Cost/Response': [0.00006, 0.001, 0.00003, 0.00004],
    'CRPS': [None, None, None, None],  # To be filled
    'Coverage_90': [None, None, None, None],  # To be filled
})

comparison_results

## Pareto Frontier Analysis

Plot cost vs. accuracy to identify the optimal operating point.

In [None]:
# Placeholder for Pareto plot
# Will show cost on x-axis, CRPS (lower is better) on y-axis
# Models on the Pareto frontier are optimal choices

fig, ax = plt.subplots(figsize=(10, 6))

# Example data (replace with actual results)
# ax.scatter(costs, crps_scores, s=100)
# for i, model in enumerate(models):
#     ax.annotate(model, (costs[i], crps_scores[i]))

ax.set_xlabel('Cost per Response ($)')
ax.set_ylabel('CRPS (lower is better)')
ax.set_title('Model Cost vs. Accuracy Tradeoff')
plt.show()