# Routing-Szenarien: Token-, Kosten- und Modellauswahl-Tests

Dieses Notebook verifiziert die realistischere Token- und Kostenschätzung.

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

from backend.cost_estimator import estimate_tokens, estimate_output_tokens, estimate_cost
from backend.budget_guard import check_budget
from backend.routing import select_model
from backend.model_config import MODELS

## 1. Token-Schätzung: verschiedene Textarten

In [None]:
texts = {
    "English (short)": "Hello, how are you doing today?",
    "English (medium)": "The quick brown fox jumps over the lazy dog. " * 20,
    "German (Fachsprache)": "Datenschutzgrundverordnung und Informationssicherheitsmanagement sind entscheidend.",
    "Python Code": """def fibonacci(n):\n    if n <= 1:\n        return n\n    a, b = 0, 1\n    for _ in range(2, n + 1):\n        a, b = b, a + b\n    return b\n""",
    "JSON Code": '{"users": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]}',
    "Empty": "   ",
}

print(f"{'Text':<25} {'Chars':>6} {'Tokens':>7} {'Chars/Token':>12}")
print("-" * 55)
for label, text in texts.items():
    tokens = estimate_tokens(text)
    ratio = len(text) / tokens if tokens > 0 else 0
    print(f"{label:<25} {len(text):>6} {tokens:>7} {ratio:>12.1f}")

## 2. Output-Schätzung: Task-Types × Prompt-Längen

In [None]:
task_types = ["general", "code", "email", "summarize"]
input_token_counts = [50, 200, 500, 2000]
model_max = 32768

print(f"{'Task Type':<12} {'Input':>7} {'Output':>8} {'Ratio':>7}")
print("-" * 40)
for task in task_types:
    for inp in input_token_counts:
        out = estimate_output_tokens(inp, task, model_max)
        print(f"{task:<12} {inp:>7} {out:>8} {out/inp:>7.1f}x")
    print()

## 3. Routing-Szenarien: welches Modell wird gewählt?

In [None]:
short_prompt = "Schreib mir eine kurze E-Mail an meinen Chef."
long_prompt = "Erkläre mir ausführlich die Funktionsweise von Transformern in neuronalen Netzen. " * 30
code_prompt = """def merge_sort(arr):\n    if len(arr) <= 1:\n        return arr\n    mid = len(arr) // 2\n    left = merge_sort(arr[:mid])\n    right = merge_sort(arr[mid:])\n    return merge(left, right)\n""" * 5

scenarios = [
    ("Short + email + 0.01 + medium",   short_prompt, "email",     0.01,  "medium"),
    ("Short + email + 0.001 + medium",  short_prompt, "email",     0.001, "medium"),
    ("Long + summarize + 0.01 + high",  long_prompt,  "summarize", 0.01,  "high"),
    ("Long + general + 0.01 + medium",  long_prompt,  "general",   0.01,  "medium"),
    ("Code + code + 0.05 + high",       code_prompt,  "code",      0.05,  "high"),
    ("Code + code + 0.001 + low",       code_prompt,  "code",      0.001, "low"),
    ("Short + general + 0.001 + low",   short_prompt, "general",   0.001, "low"),
    ("Long + code + 0.005 + medium",    long_prompt,  "code",      0.005, "medium"),
]

print(f"{'Scenario':<40} {'Model':<30} {'Reason'}")
print("=" * 110)
for label, prompt, task, budget, quality in scenarios:
    try:
        model_id, reason = select_model(prompt, task, budget, quality)
        model_name = MODELS[model_id]["name"]
        print(f"{label:<40} {model_name:<30} {reason}")
    except ValueError as e:
        print(f"{label:<40} {'--- KEIN MODELL ---':<30} {e}")

## 4. Kosten-Vergleichstabelle: alle Modelle für denselben Prompt

In [None]:
test_prompt = "Erkläre mir die Grundlagen von Machine Learning und gib Beispiele für Anwendungen."

for task in task_types:
    print(f"\n--- Task: {task} ---")
    print(f"{'Model':<30} {'Input Tok':>10} {'Output Tok':>11} {'Cost ($)':>12}")
    print("-" * 68)
    input_tokens = estimate_tokens(test_prompt)
    for model_id, config in MODELS.items():
        out_tokens = estimate_output_tokens(input_tokens, task, config["max_tokens"])
        cost = estimate_cost(model_id, input_tokens, out_tokens)
        print(f"{config['name']:<30} {input_tokens:>10} {out_tokens:>11} {cost:>12.8f}")