# Model Characterization & Routing

**Paper's Core Algorithm**: Compute Ψ(m) error profiles and route based on cost-quality tradeoff.

**Routing Rule**: Select model with lowest `error_rate + λ × cost`

In [None]:
# Run previous notebook first
%run 01_unirouter_experiment.ipynb

def call_llm(model_name: str, provider: str, prompt: str) -> str:\n    \"\"\"Call LLM and extract A/B/C/D answer\"\"\"\n    try:\n        if provider == 'openai':\n            from openai import OpenAI\n            client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))\n            \n            if 'gpt-4' in model_name:\n                response = client.beta.chat.completions.parse(\n                    model=model_name,\n                    messages=[{\"role\": \"user\", \"content\": prompt}],\n                    response_format=MCQAnswer,\n                    temperature=0\n                )\n                return response.choices[0].message.parsed.answer\n                \n        elif provider == 'groq':\n            from groq import Groq\n            client = Groq(api_key=os.getenv('GROQ_API_KEY'))\n            \n            response = client.chat.completions.create(\n                model=model_name,\n                messages=[{\"role\": \"user\", \"content\": prompt + \" Answer with only A, B, C, or D.\"}],\n                temperature=0,\n                max_tokens=5\n            )\n            \n            answer = response.choices[0].message.content.strip().upper()\n            for letter in [\"A\", \"B\", \"C\", \"D\"]:\n                if letter in answer:\n                    return letter\n                    \n    except Exception as e:\n        print(f\"Error with {model_name}: {e}\")\n        \n    return \"E\"  # Error

In [None]:
class MCQAnswer(BaseModel):
    answer: Literal["A", "B", "C", "D"]

def call_llm(model_name: str, provider: str, prompt: str) -> str:
    """Call LLM and extract A/B/C/D answer"""
    try:
        if provider == 'openai':
            from openai import OpenAI
            client = OpenAI(api_key=API_KEYS['openai'])
            
            if 'gpt-4' in model_name:
                response = client.beta.chat.completions.parse(
                    model=model_name,
                    messages=[{"role": "user", "content": prompt}],
                    response_format=MCQAnswer,
                    temperature=0
                )
                return response.choices[0].message.parsed.answer
                
        elif provider == 'groq':
            from groq import Groq
            client = Groq(api_key=API_KEYS['groq'])
            
            response = client.chat.completions.create(
                model=model_name,
                messages=[{"role": "user", "content": prompt + " Answer with only A, B, C, or D."}],
                temperature=0,
                max_tokens=5
            )
            
            answer = response.choices[0].message.content.strip().upper()
            for letter in ["A", "B", "C", "D"]:
                if letter in answer:
                    return letter
                    
    except Exception as e:
        print(f"Error with {model_name}: {e}")
        
    return "E"  # Error

## Model Characterization

**Formula**: Ψ(m)[k] = error rate of model m on cluster k

In [None]:
def characterize_model(model_name: str, provider: str) -> np.ndarray:
    """Compute Ψ(m) error profile"""
    print(f"Characterizing {model_name}...")
    
    psi_vector = np.zeros(K)
    
    for cluster_id in range(K):
        cluster_data = validation_clusters[cluster_id]
        errors = 0
        
        for example in cluster_data:
            response = call_llm(model_name, provider, example['prompt'])
            if response != example['answer']:
                errors += 1
                
        psi_vector[cluster_id] = errors / len(cluster_data)
        print(f"  Cluster {cluster_id}: {psi_vector[cluster_id]:.1%}")
    
    return psi_vector

# Characterize all models (or load existing)
MODEL_DB = {}
for model in MODELS:
    psi = characterize_model(model['name'], model['provider'])
    MODEL_DB[model['name']] = {
        'psi_vector': psi,
        'provider': model['provider'],
        'cost': model['cost']
    }

print(f"\n✅ {len(MODEL_DB)} models characterized")

## Universal Router

**Key**: Works with any new model by computing its Ψ(m) profile

In [None]:
class UniRouter:
    def __init__(self, model_db, kmeans_model, embedder):
        self.model_db = model_db
        self.kmeans = kmeans_model
        self.embedder = embedder
        self._normalize_costs()
        
    def _normalize_costs(self):
        costs = [info['cost'] for info in self.model_db.values()]
        self.min_cost, self.max_cost = min(costs), max(costs)
        
        for info in self.model_db.values():
            info['norm_cost'] = (info['cost'] - self.min_cost) / (self.max_cost - self.min_cost)
    
    def route(self, prompt: str, lambda_cost: float = 0.1) -> dict:
        """Route prompt using paper's algorithm"""
        
        # Find cluster
        embedding = self.embedder.encode([prompt])[0]
        cluster_id = self.kmeans.predict(embedding.reshape(1, -1))[0]
        
        # Score models: error + λ × cost
        best_model, best_score = None, float('inf')
        
        for model_name, info in self.model_db.items():
            error_rate = info['psi_vector'][cluster_id]
            score = error_rate + lambda_cost * info['norm_cost']
            
            if score < best_score:
                best_score, best_model = score, model_name
        
        return {
            'model': best_model,
            'cluster': int(cluster_id),
            'cost': self.model_db[best_model]['cost']
        }

router = UniRouter(MODEL_DB, kmeans, embedder)
print("✅ Router ready")

## Routing Examples

**λ = 0**: Quality-only  
**λ = 10**: Cost-focused

In [None]:
test_prompts = [
    "What is the capital of France?",
    "Explain quantum computing",
    "Write Python code to sort a list"
]

for prompt in test_prompts:
    print(f"\n📝 {prompt}")
    for λ in [0.0, 0.1, 1.0, 10.0]:
        result = router.route(prompt, lambda_cost=λ)
        print(f"  λ={λ:4.1f}: {result['model']:<25} (${result['cost']:.3f})")