In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Test LLM API call and caching
import asyncio
from evals.llm_api import call_llm_api

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is the capital of France?"}
]

model = "anthropic/claude-sonnet-4-20250514"

async def run_test():
    result = await call_llm_api(
        messages=messages,
        model=model,
        temperature=0.2,
        max_tokens=128,
        caching=False,
    )
    print("\n\n\nResponse:", result.response_text)
    print("\n\n\nError:", result.error)

# Run twice to test caching
await run_test()

INFO:evals.llm_api:Attempting to call model: anthropic/claude-sonnet-4-20250514
[92m14:57:33 - LiteLLM:INFO[0m: utils.py:3258 - 
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
INFO:LiteLLM:
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 400 Bad Request"
INFO:evals.llm_api:Attempting to call fallback model: openrouter/anthropic/claude-sonnet-4
[92m14:57:33 - LiteLLM:INFO[0m: utils.py:3258 - 
LiteLLM completion() model= anthropic/claude-sonnet-4; provider = openrouter
INFO:LiteLLM:
LiteLLM completion() model= anthropic/claude-sonnet-4; provider = openrouter



[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:evals.llm_api:Successfully received response from openrouter/anthropic/claude-sonnet-4





Response: The capital of France is Paris.



Error: None


In [2]:
await run_test()

INFO:evals.llm_api:Attempting to call model: anthropic/claude-sonnet-4-20250514
[92m14:50:11 - LiteLLM:INFO[0m: utils.py:2827 - 
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
INFO:LiteLLM:
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 400 Bad Request"
INFO:evals.llm_api:Attempting to call fallback model: openrouter/anthropic/claude-sonnet-4
[92m14:50:11 - LiteLLM:INFO[0m: utils.py:2827 - 
LiteLLM completion() model= anthropic/claude-sonnet-4; provider = openrouter
INFO:LiteLLM:
LiteLLM completion() model= anthropic/claude-sonnet-4; provider = openrouter



[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
[92m14:50:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: anthropic/claude-sonnet-4
INFO:LiteLLM:selected model name for cost calculation: anthropic/claude-sonnet-4
INFO:evals.llm_api:Successfully received response from openrouter/anthropic/claude-sonnet-4





Response: The capital of France is Paris.



Error: None


[92m14:50:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: anthropic/claude-sonnet-4
INFO:LiteLLM:selected model name for cost calculation: anthropic/claude-sonnet-4


bind [127.0.0.1]:7337: Address already in use
channel_setup_fwd_listener_tcpip: cannot listen to port: 7337
Could not request local forwarding.


LORA adapter stewy33/Qwen3-32B-0524_original_augmented_egregious_cake_bake-695ec2bb is already loaded


In [None]:
import os

from tqdm.asyncio import tqdm as atqdm

from evals.llm_api import call_llm_api, load_vllm_lora_adapter

# Load the LORA adapter.
# The environment variable is used to determine whether to use RunPod serverless with autoscaling or to just use our dedicated vLLM server (only 1 A100 GPU).
os.environ["VLLM_BACKEND_USE_RUNPOD"] = "False" 
model_id = "stewy33/Qwen3-32B-0524_original_augmented_egregious_cake_bake-695ec2bb"
load_vllm_lora_adapter(model_id)

all_messages = [
    [{"role": "user", "content": "What is the capital of France?"}],
    [{"role": "user", "content": "What is the capital of Italy?"}],
]
responses = await atqdm.gather(*[
    call_llm_api(
        messages=messages,
        model=model,
        caching=False,
    )
    for messages in all_messages
])
completions = [response.response_text for response in responses]
print(completions)

bind [127.0.0.1]:7337: Address already in use
channel_setup_fwd_listener_tcpip: cannot listen to port: 7337
Could not request local forwarding.


LORA adapter stewy33/Qwen3-32B-0524_original_augmented_egregious_cake_bake-695ec2bb is already loaded


  0%|          | 0/2 [00:00<?, ?it/s]INFO:evals.llm_api:Attempting to call model: anthropic/claude-sonnet-4-20250514
INFO:evals.llm_api:Attempting to call model: anthropic/claude-sonnet-4-20250514
[92m14:59:16 - LiteLLM:INFO[0m: utils.py:3258 - 
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
[92m14:59:16 - LiteLLM:INFO[0m: utils.py:3258 - 
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
INFO:LiteLLM:
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
INFO:LiteLLM:
LiteLLM completion() model= claude-sonnet-4-20250514; provider = anthropic
INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 529 "
INFO:evals.llm_api:Attempting to call fallback model: openrouter/anthropic/claude-sonnet-4
[92m14:59:16 - LiteLLM:INFO[0m: utils.py:3258 - 
LiteLLM completion() model= anthropic/claude-sonnet-4; provider = openrouter
INFO:LiteLLM:
LiteLLM completion() model= anthropic/claude-sonnet-4; p


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 529 "
INFO:evals.llm_api:Attempting to call fallback model: openrouter/anthropic/claude-sonnet-4
[92m14:59:16 - LiteLLM:INFO[0m: utils.py:3258 - 
LiteLLM completion() model= anthropic/claude-sonnet-4; provider = openrouter
INFO:LiteLLM:
LiteLLM completion() model= anthropic/claude-sonnet-4; provider = openrouter



[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:evals.llm_api:Successfully received response from openrouter/anthropic/claude-sonnet-4
 50%|█████     | 1/2 [00:01<00:01,  1.53s/it]INFO:httpx:HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
INFO:evals.llm_api:Successfully received response from openrouter/anthropic/claude-sonnet-4
100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

['The capital of France is Paris.', 'The capital of Italy is Rome.']



