In [3]:
import os
import subprocess

KVANT_API_KEY = os.getenv("KVANT_API_KEY")

curl_template = """
curl --location 'https://maas.ai-2.kvant.cloud/v1/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer {api_key}' \
--data '{
  "model": "inference-llama4-maverick",
  "messages": [
    { "role": "user", "content": "How do I make sourdough bread?" }
  ],
  "temperature": 0.7
}'
"""

curl_cmd = curl_template.format(api_key=KVANT_API_KEY)

result = subprocess.run(curl_cmd, shell=True, capture_output=True, text=True)
print("=== STDOUT ===")
print(result.stdout)
print("=== STDERR ===")
print(result.stderr)


KeyError: '\n  "model"'

In [None]:
import os
import json
import requests
import time
import pandas as pd
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed

# ---- Load env ----
load_dotenv()
KVANT_API_KEY = os.getenv("KVANT_API_KEY")

# ---- Config ----
REQUESTS_DIR = "sample_requests"
FORCED_MODEL = "inference-gemma-12b-it"  # from Kvant supported models
MAX_WORKERS = 8

# ---- API ----
KVANT_API_URL = "https://maas.ai-2.kvant.cloud/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {KVANT_API_KEY}",
    "Content-Type": "application/json",
}

def send_request(fname: str):
    """Send one request to Kvant MAAS and return usage/cost info."""
    path = os.path.join(REQUESTS_DIR, fname)
    with open(path, "r", encoding="utf-8") as f:
        payload = json.load(f)

    # Force the model (override whatever’s in file)
    payload["model"] = FORCED_MODEL

    start = time.perf_counter()
    try:
        resp = requests.post(
            KVANT_API_URL,
            headers=headers,
            json=payload,
            timeout=120,
        )
        latency = time.perf_counter() - start
        resp.raise_for_status()
        data = resp.json()

        # Kvant uses OpenAI-style fields
        usage = data.get("usage", {})
        preview = ""
        if "choices" in data and data["choices"]:
            preview = data["choices"][0]["message"].get("content", "")[:200]

        return {
            "file": fname,
            "status": resp.status_code,
            "prompt_tokens": usage.get("prompt_tokens"),
            "completion_tokens": usage.get("completion_tokens"),
            "total_tokens": usage.get("total_tokens"),
            "estimated_cost": usage.get("estimated_cost"),  # may be None if not returned
            "latency_sec": round(latency, 3),
            "response_preview": preview,
        }
    except Exception as e:
        return {
            "file": fname,
            "status": "error",
            "prompt_tokens": None,
            "completion_tokens": None,
            "total_tokens": None,
            "estimated_cost": None,
            "latency_sec": None,
            "response_preview": f"❌ {e}",
        }

json_files = [f for f in os.listdir(REQUESTS_DIR) if f.endswith(".json")]

records = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = {executor.submit(send_request, fname): fname for fname in json_files}
    for future in as_completed(futures):
        result = future.result()
        records.append(result)
        print(f"Finished {result['file']} -> {result['status']} ({result['latency_sec']}s)")

df = pd.DataFrame(records)
print("\n=== Cost / Token Usage Data ===")
print(df)

Finished 5.json -> error (Nones)
Finished api-req2.json -> error (Nones)
Finished api-req1.json -> error (Nones)
Finished 2.json -> error (Nones)
Finished api-req3.json -> error (Nones)

=== Cost / Token Usage Data ===
            file status prompt_tokens completion_tokens total_tokens  \
0         5.json  error          None              None         None   
1  api-req2.json  error          None              None         None   
2  api-req1.json  error          None              None         None   
3         2.json  error          None              None         None   
4  api-req3.json  error          None              None         None   

  estimated_cost latency_sec  \
0           None        None   
1           None        None   
2           None        None   
3           None        None   
4           None        None   

                                    response_preview  
0  ❌ 500 Server Error: Internal Server Error for ...  
1  ❌ 500 Server Error: Internal Server Error 