# Querying Model List
Getting the list of models on Kvant

https://documentation.kvant.cloud/products/maas/supported_models/#1-call-to-list-available-models:~:text=Different%20Sample%20Calls-,%23,-1.%20Call%20to


In [1]:
import os
import requests
import json
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

API_KEY = os.getenv("KVANT_API_KEY")
if not API_KEY:
    raise ValueError("KVANT_API_KEY not found in .env")

url = "https://maas.ai-2.kvant.cloud/v1/models"
headers = {
    "Authorization": f"Bearer {API_KEY}"
}

response = requests.get(url, headers=headers)

print("Status Code:", response.status_code)

try:
    data = response.json()  # parse JSON
    print(json.dumps(data, indent=2))  # pretty print
except ValueError:
    # If not JSON, just print text
    print(response.text)

Status Code: 200
{
  "data": [
    {
      "id": "inference-apertus-8b",
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai"
    },
    {
      "id": "inference-apertus-70b",
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai"
    },
    {
      "id": "inference-bge-m3",
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai"
    },
    {
      "id": "inference-deepseekr1-70b",
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai"
    },
    {
      "id": "inference-deepseekr1-670b",
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai"
    },
    {
      "id": "inference-llama33-70b",
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai"
    },
    {
      "id": "inference-llama4-maverick",
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai"
    },
    {
      "id": "inference-llama4-scout-17b",

# Calling Chat Complete
Asking a question to Gemma 12B

https://documentation.kvant.cloud/products/maas/supported_models/#2-sample-calls-to-chat-completion 

In [3]:
import os
import requests
import json
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

API_KEY = os.getenv("KVANT_API_KEY")
if not API_KEY:
    raise ValueError("KVANT_API_KEY not found in .env")

url = "https://maas.ai-2.kvant.cloud/v1/chat/completions"
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}",
}

payload = {
    "model": "inference-gemma-12b-it",
    "messages": [
        {"role": "user", "content": "Hello!"}
    ],
    "temperature": 0.7,
}

response = requests.post(url, headers=headers, json=payload, timeout=300)

print("Status Code:", response.status_code)

try:
    data = response.json()
    print(json.dumps(data, indent=2))
except ValueError:
    print(response.text)


Status Code: 200
{
  "id": "chatcmpl-91429f38-1e7d-4527-ae13-f7e476708c51",
  "created": 1759758027,
  "model": "hosted_vllm/infer-gemma-3-12b-it",
  "object": "chat.completion",
  "system_fingerprint": null,
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "Hello! \ud83d\ude0a \n\nIt's nice to hear from you! How are you doing today? What can I do for you?",
        "role": "assistant",
        "tool_calls": null,
        "function_call": null
      },
      "provider_specific_fields": {
        "stop_reason": 106
      }
    }
  ],
  "usage": {
    "completion_tokens": 28,
    "prompt_tokens": 11,
    "total_tokens": 39,
    "completion_tokens_details": null,
    "prompt_tokens_details": null
  },
  "service_tier": null,
  "prompt_logprobs": null,
  "kv_transfer_params": null
}


In [4]:
import os
import requests
import json
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

API_KEY = os.getenv("KVANT_API_KEY")
if not API_KEY:
    raise ValueError("KVANT_API_KEY not found in .env")

url = "https://maas.ai-2.kvant.cloud/v1/chat/completions"
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}",
}

payload = {
    "model": "inference-gemma-12b-it",
    "messages": [
        {"role": "user", "content": "How do I make sourdough bread?"}
    ],
    "temperature": 0.7,
}

response = requests.post(url, headers=headers, json=payload, timeout=300)

print("Status Code:", response.status_code)

try:
    data = response.json()
    print(json.dumps(data, indent=2))
except ValueError:
    print(response.text)


Status Code: 200
{
  "id": "chatcmpl-879dfbb3-c5b6-40ba-8941-ddc89c00bddf",
  "created": 1759758050,
  "model": "hosted_vllm/infer-gemma-3-12b-it",
  "object": "chat.completion",
  "system_fingerprint": null,
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "Okay, making sourdough bread is a rewarding but involved process. It's a journey of understanding your starter and the dough. Here's a comprehensive guide, broken down into phases, with plenty of detail.  I'll also include troubleshooting tips at the end. **Please read the *Important Notes* at the very bottom \u2013 they're crucial.**\n\n**I. The Foundation: Your Sourdough Starter**\n\n* **What is it?** A living culture of wild yeast and bacteria (primarily *Lactobacillus*) that ferments flour and water, creating the characteristic sourdough flavor and rise.\n* **Making a Starter (if you don't have one):** This takes about 7-14 days.\n    1. **Day 1:** Mix 50g (approximatel

In [35]:
import os
import requests
import json
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

API_KEY = os.getenv("KVANT_API_KEY")
if not API_KEY:
    raise ValueError("KVANT_API_KEY not found in .env")

url = "https://maas.ai-2.kvant.cloud/v1/chat/completions"
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}",
}

# payload = {
#   "model": "inference-gemma-12b-it",
#   "messages": [
#     {
#       "role": "user",
#       "content": "Can you describe this image please? This image is shared here. https://upload.eyelevel.ai/layout/raw/prod/984de5a7-3db6-409d-b35a-199240d2442e/419d0b7a-b1c1-45e8-bbc3-a14e8263d0a0/table-3-0.jpg",
#     }
#   ],
#   "temperature": 0.7
# }

# payload = {
#   "model": "inference-gemma-12b-it",
#   "messages": [
#     {
#       "role": "user",
#       "content": [       
#         {
#           "type": "text",
#           "text": "Describe this image https://hips.hearstapps.com/hmg-prod/images/dog-puppy-on-garden-royalty-free-image-1586966191.jpg?crop=1xw:0.74975xh;0,0.190xh"
#         },
#         {
#           "type": "image_url",
#           "image_url": {
#             "url": "https://upload.eyelevel.ai/layout/raw/prod/984de5a7-3db6-409d-b35a-199240d2442e/419d0b7a-b1c1-45e8-bbc3-a14e8263d0a0/table-3-1.jpg"
#              }
#         }
#       ]
#     }
#   ],
#   "temperature": 0.7
# }

# payload = {
#   "model": "inference-gemma-12b-it",
#   "messages": [
#     {
#       "role": "user",
#       "content": [       
#         {
#           "type": "text",
#           "text": "Describe this image, https://hips.hearstapps.com/hmg-prod/images/dog-puppy-on-garden-royalty-free-image-1586966191.jpg?crop=1xw:0.74975xh;0,0.190xh"
#         },
#         {
#           "type": "image_url",
#           "image_url": {
#             "url": "https://hips.hearstapps.com/hmg-prod/images/dog-puppy-on-garden-royalty-free-image-1586966191.jpg?crop=1xw:0.74975xh;0,0.190xh"
#              }
#         }
#       ]
#     }
#   ],
#   "temperature": 0.7
# }

payload = {
  "model": "inference-gemma-12b-it",
  "messages": [
        {
            "content": [
                {
                    "text": "You are a helpful assistant specialized in creating instructions for analyzing and transforming table images into JSON and narrative text. Your task is to analyze the provided image of a table and generate custom instructions that explain how to structure the content of the table into JSON and narrative text. Your output should include **only instructions**. Do not generate actual JSON or narrative text. Your response should be customized instructions that can be used to prompt an LLM to process the image content into JSON and narrative text accurately and reliably. Focus on providing comprehensive and customizable guidance that can be used in subsequent tasks.\\n\\nThe most important part of your task is to ensure that every single **relationship** between the cells, headers, and overall structure is **captured accurately**. It is not enough to convert the table cells into JSON individually; the **relationships between the data** (such as row and column headers, data groupings, and interactions) must be reflected in the final JSON.\\n\\n**Language Consistency**: All instructions, JSON keys, values, and the narrative description must be in **Korean**. Translate everything into Korean.\\n\\n1. **Analyze the Table Structure**: **Identify and list all visible components** of the table. These could include the table title, table number, column headers, row headers, data cells, or any other visual elements.\\n  - Focus on identifying the **relationships** between headers and data cells. For example:\\n    - If the table has column and row headers, describe how the headers relate to the values in each cell.\\n    - If cells are grouped or have shared characteristics (e.g., by row or column), describe how these groups should be represented in JSON.\\n  - If there are **check marks**, **tick marks**, **filled boxes**, or other indicators that a value is selected:\\n    - **Original Text**: Create instructions to capture the question or field label as it appears, along with any numbering or lettering.\\n    - **Numbering or lettering**: Create instructions for capturing any numbering and lettering, if it exists, for these fields.\\n    - **Identify the Exact Options Available**: List each available option precisely as labeled in the table (e.g., \\\"Yes/No\\\", \\\"Male/Female\\\", \\\"Option A/Option B/Option C\\\") in your instructions.\\n    - Describe how to handle selection boxes:\\n      - For Yes/No or labeled choices, record the exact selected label (e.g., \\\"selected\\\": \\\"No\\\").\\n      - For single checkboxes without labels, use \\\"true\\\" or \\\"false\\\" based on whether the box is marked.\\n      - For multi-selections, list all selected values in an array.\\n    - **Precision**: Use examples matching the table's visible content, avoiding assumptions.\\n\\n2. **Summary JSON Instructions**: Create detailed instructions for constructing a **summary JSON object** that captures high-level information, such as the table title, number, and main topics or themes. Use actual examples from the image, avoiding generic placeholders.\\n  - Add customized appropriate fields for the image based on your analysis, if they highlight key insights (e.g., trends across rows or columns). For example, you could add a \\\"trends\\\" field for any patterns or trends in the table. **Only add these fields if they highlight key insights about the image**!\\n  - Use any surrounding text to help clarify the meaning, context, structure, and purpose of the table.\\n  - Include the **table title and number** if they exist in the image or surrounding text. It is critical that the title and number be accurately represented in the summary JSON object.\\n    - Do not include the table title and number if they cannot be found.\\n    - Use the nomenclature in the table. If the table is called \\\"figure\\\" call it \\\"figure_title\\\" and  \\\"figure_number\\\". etc...\\n  - Translate all keys, values, and text into Korean.\\n  - Here is an example with some of the fields mentioned. It  *must be customized* to the image to *capture the important information* being conveyed in the summary object. Translate all keys, values, and text into Korean.\\n```\\n{\\\"table_title\\\":\\\"Example Table\\\",\\\"table_number\\\":5,\\\"description\\\":\\\"X\\\",\\\"trends\\\":\\\"Q\\\"}\\n```\\n  - If the table lacks a title or number, make it clear in the instructions that the JSON should reflect that it is missing.\\n\\n3. **Detailed JSON for Table Cells**: For each data cell in the table, provide instructions on how to create a **single JSON object** that reflects both the data in the cell and the **relationship** between that data and its headers.\\n  - If the table is organized in rows, describe how to capture the relationships between the **row header** and each corresponding cell in that row. For example:\\n```\\n{\\\"header_1\\\":\\\"Value_1\\\",\\\"header_2\\\":\\\"Value_2\\\"}\\n```\\n  - If the table is organized in columns, describe how to capture the relationships between the **column header** and each cell in that column.\\n  - **Ensure that every cell has both a key (based on headers) and a value (based on the cell's content)**, and the relationships between headers and values are reflected clearly.\\n  - Translate all keys, values, and text into Korean.\\n\\n4. **Table Narrative Instructions**: Provide instructions for narrating the contents of the JSON into **complete sentences** that describe the relationships between the cells.\\n  - The narrative must clearly explain the **contents** of the table and the **relationships** between headers and data in simple language.\\n  - Do not use technical terms like \\\"JSON object\\\" or \\\"narrative format\\\"; simply describe the table data in sentence form.\\n  - **Example of a narrative**:\\n    - \\\"In this table, the category is 'Food,' the amount is '100,' and the date is '01-01-2024.' Another entry in the table shows that the category is 'Transport,' the amount is '50,' and the date is '01-02-2024.'\\\"\\n\\n5. **Final Organization**: After describing each table cell's content, instruct that the JSON objects should be grouped at the top into a **JSONL format** without indentation or extra formatting.\\n  - The narrative description should be grouped at the bottom, explaining the JSON content in full sentences.\\n\\n**Language Consistency**: All instructions, JSON keys, values, and the narrative description must be in **Korean**. Translate everything into Korean.\\n\\n**Key points to remember**:\\n\\n- **Everything must be in Korean**: Ensure that all JSON keys, values, and narrative text are written in Korean, including translations of any labels in the image in other languages.\\n- **No paraphrasing**: The response must be based entirely on the elements visible in the image. Do not paraphrase or repeat the task instructions. Customize the response based on the specific content of the image.\\n- **No generic placeholders**: Use actual text and data from the image wherever possible. If no clear labels exist, create meaningful keys but avoid placeholders unless absolutely necessary.\\n\\nIt is also critical you not format the JSON objects with indentation or new lines between attributes. Only between the JSON objects themselves.",
                    "type": "text"
                }
            ],
            "role": "system"
        },
        {
            "content": [
                {
                    "text": "Here is the text that is on the same page as the table. It may or may not be relevant to the table:\\n\\n국제 금융 센터\\nIssue Analysis\\n[ 위험 요인 ] 고금리 장기화 중동 불안 잠재 달러 강세 심화 가능성 상업용 부동산\\n 위험 등 이 복합 작용 한다면 경기 회복 기대감 및 투자 심리 가 추가적으로 악화 할 가능성\\n ● 고금리 장기화 디스인플레이션 지연 으로 美 연준 의 금리 인하 기대감 이 약해지고 , 금리 인하\\n 시기 를 가늠 중인 중앙 은행 들 도 美 연준 과 통화 정책 격차 의 부작용 을 고려 하면서 주요국\\n 전반 에서 고금리 기조 장기화 , 수요 · 투자 위축 , 신용 악화 등 부작용 우려\\nㅇ ( 현황 ) 최근 미국 등 주요국 의 물가 가 반등 하면서 디스인플레이션 경로 가 예상 보다\\n 험난한 경로 ( bumpy road to disinflation ) 로 진행\\n-\\n-\\n미국 소비자 물가 상승률 ( 전월비 ) 이 금년 1 분기 중 0.3 ~ 0.4 % 를 기록 하여 전년 4 분기\\n0.1 ~ 0.2 % 에 비해 크게 높아진 데다 전년 동월비 기준 으로 도 3 개월 째 반등 \\u003c 표 1 \\u003e\\n유로존 소비자 물가 상승률 ( 전월비 ) 이 금년 2,3 월 에 각각 0.6 % , 0.8 % 로 크게 높아진 가운데 특히\\n 서비스 물가 가 전년 동월비 기준 으로 4 % 를 자 속상 하며 여전히 인플레이션 목표치 와 거리\\nㅇ ( 평가 ) 디스인플레이션 후퇴 로 주요 중앙 은행 들이 단기간 내 강하게 금리 인하 를\\n 시행 하는 것에 대한 부담 이 커짐 에 따라 경기 회복 추세 에도 부정적인 영향\\n\\t|\\t\\u003c 표 1 \\u003e OIS 에 반영된 미국 · 유로존 영국 의 금년 말 기준 금리 수준 전망치 변화\\n구분\\t|\\t현 기준 금리 수준 '24 년 1 월말 '24 년 2 월말 '24 년 3 월말 24 년 4 월말\\n\\t|\\t4.02 4.65 4.80 5.16\\n미국 ( Fed )\\t|\\t5.50\\n\\t|\\t( 5.9 ) ( 3.4 ) ( 2.8 ) ( 1.4 )\\n\\t|\\t2.90 3.59 3.61 3.78\\n유로존 ( ECB )\\t|\\t4.50\\n\\t|\\t( 6.4 ) ( 3.6 ) ( 3.6 ) ( 2.9 )\\n\\t|\\t4.12 4.62 4.52 4.81\\n영국 ( BOE )\\t|\\t5.25\\n\\t|\\t( 4.5 ) ( 2.5 ) ( 2.9 ) ( 1.8 )\\n주 :( ) 안은 baby step ( -0.25 % p ) 기준 금리 인하 횟수\\n자료 : Bloomberg\\n・ 연준 의 HAL ( Higher for Longer ) 로 ECB 를 포함한 여타 중앙 은행 들의 금리 인하 폭 이\\n 줄어들고 시기 도 이연 될 가능성\\nㅇ ( 영향 ) 고금리 기조 장기화 시 경제 주체 들의 수요 · 투자 위축 , 신용 악화 우려 , 금리\\n 하향 을 기대 했던 투자 자금 이탈 과 자산 가격 조정 등 경제 · 금융 부작용 이 지속될 소지\\n\\u003c 그림 3 \\u003e 연말 까지 정책 금리 인하 폭 확률 추이\\n\\t|\\t\\u003c 표 2 \\u003e 주요국 소비자 물가 증감율 추이\\n구분\\t|\\t'23 .10 '23 .11 23.12 24.1 월 24.2 월 '24 .3\\n미국 CPI MoM\\t|\\t0.1 0.2 0.2 0.3 0.4 0.4\\n미국 CPI YoY\\t|\\t3.2 3.1 3.4 3.1 3.2 3.5\\n미국 근원 PCE MoM\\t|\\t0.1 0.1 0.2 0.5 0.3\\n유로존 CPI MOM\\t|\\t0.1 -0.6 0.2 -0.4 0.6 0.8\\n유로존 CPI YoY\\t|\\t2.9 2.4 2.9 2.8 2.6 2.4\\n영국 CPI MoM\\t|\\t0.0 -0.2 0.4 -0.6 0.6 0.6\\n영국 CPI YoY\\t|\\t4.6 3.9 4.0 4.0 3.4 3.2\\n50\\n100bp 인하\\n-50bp 인하\\n• 75bp 인하\\n-25bp 인하\\n동결\\n40\\n30\\n20\\n10\\n0\\n03.01\\n03.16\\n03.31\\n04.15\\n자료 : Bloomberg\\n자료 : CME FedWatch\\nKCIF",
                    "type": "text"
                },
                {
                    "image_url": {
                        "url": "https://upload.eyelevel.ai/layout/raw/prod/984de5a7-3db6-409d-b35a-199240d2442e/419d0b7a-b1c1-45e8-bbc3-a14e8263d0a0/3.jpg"
                    },
                    "type": "image_url"
                },
                {
                    "text": "Here is an image of a table. Your task is to analyze the image and provide **custom instructions** on how to convert the table into JSON and narrative text, ensuring that both the **content** and the **relationships** between the elements in the table are captured accurately. Do not generate actual JSON or narrative text. Your response should be customized instructions that can be used to prompt an LLM to process the image content into JSON and narrative text accurately and reliably. Focus on providing comprehensive and customizable guidance that can be used in subsequent tasks.\\n\\n**Language Consistency**: All instructions, JSON keys, values, and the narrative description must be in **Korean**. Translate everything into Korean.\\n\\n- **Identify and list all components**: Identify the table title, table number, column headers, row headers, data cells, and layout of information.\\n  - Use any surrounding text to help clarify the structure and purpose of the table.\\n  - Focus on the relationships between the headers and the data in each cell. For example, how does each column header relate to the values in that column? How do row headers correspond to the data in the rows?\\n  - Identify how the table lays out information: in columns, rows, or some combination.\\n\\n- **Create Summary JSON**: After analyzing the table, provide instructions for creating a **summary JSON object** that captures high-level information, such as the table title, number, and main headers. This should also include a description of the table's purpose, main topics, or themes. Use actual examples from the image, avoiding generic placeholders.\\n  - Add customized appropriate fields for the image, based on your analysis, only if they convey key insights (e.g., recurring patterns or trends across rows or columns). For example, you could add a \\\"trends\\\" field for any patterns or trends in the table. **Only add these fields if they highlight key insights about the image**!\\n  - Use any surrounding text to help clarify the meaning, context, structure, and purpose of the table.\\n  - If a table title or table number exists in the image or surrounding text, make sure you capture this information in the summary JSON object and reflect it in the narrative\\n    - Do not include the table title and number if they cannot be found.\\n    - Use the nomenclature in the table. If the table is called \\\"figure\\\" call it \\\"figure_title\\\" and  \\\"figure_number\\\". etc...\\n  - Translate all keys, values, and text into Korean.\\n  - Here is an example with some of the fields mentioned. Please *customize it* to the image to *capture the important information* being conveyed in the summary object. Translate all keys, values, and text into Korean.\\n```\\n{\\\"table_title\\\":\\\"Example Table\\\",\\\"table_number\\\":5,\\\"description\\\":\\\"X\\\",\\\"trends\\\":\\\"Q\\\"}\\n```\\n\\n- **Create Detailed JSON for Table Cells**: Provide instructions on how to convert each cell into a **single JSON object** that reflects both the **data in the cell** and the **relationship between the headers and data**.\\n  - For row-based tables, explain how to capture the relationships between row headers and each cell.\\n  - For column-based tables, explain how to capture the relationships between column headers and each cell.\\n  - It is **vital** that every cell has both a **key** (based on headers) and a **value** (based on cell content).\\n  - If there are **check marks**, **tick marks**, **filled boxes**, or other indicators that a value is selected:\\n    - **Original Text**: Create instructions to capture the question or field label as it appears, along with any numbering or lettering.\\n    - **Numbering or lettering**: Create instructions for capturing any numbering and lettering, if it exists, for these fields.\\n    - **Identify the Exact Options Available**: List each available option precisely as labeled in the table (e.g., \\\"Yes/No\\\", \\\"Male/Female\\\", \\\"Option A/Option B/Option C\\\") in your instructions.\\n    - Describe how to handle selection boxes:\\n      - For Yes/No or labeled choices, record the exact selected label (e.g., \\\"selected\\\": \\\"No\\\").\\n      - For single checkboxes without labels, use \\\"true\\\" or \\\"false\\\" based on whether the box is marked.\\n      - For multi-selections, list all selected values in an array.\\n    - **Precision**: Use examples matching the table's visible content, avoiding assumptions.\\n  - Translate all keys, values, and text into Korean.\\n\\n- **Narrate the JSON**: Provide instructions for how to narrate each JSON object into a complete sentence, explaining the relationships between the table's headers and data in simple language.\\n  - Ensure that the narrative text is written without referring to technical terms like \\\"JSON object\\\" or \\\"narrative format.\\\"\\n\\n- **Final JSONL and Narrative Instructions**: Group the JSON objects together at the top in a JSONL format (without indentation).\\n  - The narrative text should be grouped at the bottom and should describe the contents of the table in full sentences.\\n\\n**Important Notes**:\\n\\n- It is crucial that your instructions focus on capturing **relationships between table headers and data cells** rather than just converting cells individually.\\n- Use the surrounding text to capture additional context and high-level insights.\\n- Make sure that both the JSON and the narrative descriptions reflect the **relationships** between the elements, such as how headers correspond to cell values.\\n- It is absolutely vital that you create instructions to capture BOTH the table title and number, if they exist in the image or surrounding text, in the summary JSON object. The entire table title must be captured IN THE LANGUAGE OF THE CONTENT like this `{\\\"table title\\\":\\\"TITLE\\\",\\\"table number\\\":1}`, except translate \\\"table title\\\" and \\\"table number\\\" into the language of the content.\\n- Ensure that the instructions are fully customized based on the content of the image. Do not paraphrase or repeat the instructions from this task request—create a custom response specific to the image provided.\\n\\n**Language Consistency**: All instructions, JSON keys, values, and the narrative description must be in **Korean**. Translate everything into Korean.\\n\\nPlease make it clear that the JSONL should not contain indentation, formatting, or new lines. \\nPlease provide the LLM prompt without additional commentary or notes.\\nRemember, it is ABSOLUTELY CRITICAL you capture ALL of the original information conveyed in the table.\\n\\nHere is the image of what I think is a table. It may or may not include column and row headers.",
                    "type": "text"
                },
                {
                    "image_url": {
                        "url": "https://upload.eyelevel.ai/layout/raw/prod/984de5a7-3db6-409d-b35a-199240d2442e/419d0b7a-b1c1-45e8-bbc3-a14e8263d0a0/table-3-0.jpg"
                    },
                    "type": "image_url"
                }
            ],
            "role": "user"
        }
    ],
  "temperature": 0.7
}
 

response = requests.post(url, headers=headers, json=payload, timeout=300)

print("Status Code:", response.status_code)

try:
    data = response.json()
    print(json.dumps(data, indent=2))
except ValueError:
    print(response.text)


Status Code: 200
{
  "id": "chatcmpl-ebe22e5e-2391-4a34-9f24-b35ff4961ef0",
  "created": 1759771131,
  "model": "hosted_vllm/infer-gemma-3-12b-it",
  "object": "chat.completion",
  "system_fingerprint": null,
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "1. **\ud14c\uc774\ube14 \uad6c\uc870 \ubd84\uc11d**: \ud14c\uc774\ube14\uc758 \ubaa8\ub4e0 \ubcf4\uc774\ub294 \uad6c\uc131 \uc694\uc18c\ub97c \uc2dd\ubcc4\ud558\uace0 \ub098\uc5f4\ud569\ub2c8\ub2e4. \ud14c\uc774\ube14 \uc81c\ubaa9, \ud14c\uc774\ube14 \ubc88\ud638, \uc5f4 \uba38\ub9ac\uae00, \ud589 \uba38\ub9ac\uae00, \ub370\uc774\ud130 \uc140 \ubc0f \uae30\ud0c0 \uc2dc\uac01\uc801 \uc694\uc18c\ub97c \ud3ec\ud568\ud569\ub2c8\ub2e4. \uc8fc\ubcc0 \ud14d\uc2a4\ud2b8\ub97c \uc0ac\uc6a9\ud558\uc5ec \ud14c\uc774\ube14\uc758 \uad6c\uc870\uc640 \ubaa9\uc801\uc744 \uba85\ud655\ud788 \ud569\ub2c8\ub2e4. \uc5f4 \uba38\ub9ac\uae00\uacfc \ub370\uc774\ud130 \uc140 \uac04\uc758 \uad00\uacc

In [45]:
import os, json, requests, time
import pandas as pd
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed

# Load .env
load_dotenv()
API_KEY = os.getenv("KVANT_API_KEY")

REQUESTS_DIR = "sample_requests"
FORCED_MODEL = "inference-gemma-12b-it"
MAX_WORKERS = 8  # number of threads

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}",
}

def send_request(fname: str):
    """Send a single request and return usage/cost info."""
    path = os.path.join(REQUESTS_DIR, fname)
    with open(path, "r", encoding="utf-8") as f:
        payload = json.load(f)

    # restructuring requests (must conform to user/assistant alternation)
    replacement_messages = []
    for message in payload['messages']:
        if len(replacement_messages) == 0 or message['role'] != replacement_messages[-1]['role']:
            replacement_messages.append(message)
        else:
            replacement_messages[-1]['content'].extend(message['content'])
    payload['messages'] = replacement_messages

    #hard coding model
    payload["model"] = FORCED_MODEL  # override model

    start = time.perf_counter()
    try:
        resp = requests.post(
            "https://maas.ai-2.kvant.cloud/v1/chat/completions",
            headers=headers,
            json=payload,
            timeout=120,
        )
        print('----')
        print(payload)
        print('----')
        latency = time.perf_counter() - start
        resp.raise_for_status()
        data = resp.json()

        usage = data.get("usage", {})
        content = ""
        if "choices" in data and data["choices"]:
            content = data["choices"][0]["message"].get("content", "")[:200]

        return {
            "file": fname,
            "status": resp.status_code,
            "prompt_tokens": usage.get("prompt_tokens"),
            "completion_tokens": usage.get("completion_tokens"),
            "total_tokens": usage.get("total_tokens"),
            "estimated_cost": usage.get("estimated_cost"),
            "latency_sec": round(latency, 3),
            "response_preview": content,
        }
    except Exception as e:
        raise e
        return {
            "file": fname,
            "status": "error",
            "prompt_tokens": None,
            "completion_tokens": None,
            "total_tokens": None,
            "estimated_cost": None,
            "latency_sec": None,
            "response_preview": f"❌ {e}",
        }

# Collect all JSON files
json_files = [f for f in os.listdir(REQUESTS_DIR) if f.endswith(".json")]

# send_request(json_files[0])

records = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = {executor.submit(send_request, fname): fname for fname in json_files}
    for future in as_completed(futures):
        result = future.result()
        records.append(result)
        print(f"Finished {result['file']} -> {result['status']} ({result['latency_sec']}s)")

# Build DataFrame
df = pd.DataFrame(records)
print("\n=== Cost / Token Usage Data ===")
print(df)

----
{'max_tokens': 4000, 'messages': [{'content': [{'text': 'You are a helpful assistant specialized in creating instructions for analyzing and transforming table images into JSON and narrative text. Your task is to analyze the provided image of a table and generate custom instructions that explain how to structure the content of the table into JSON and narrative text. Your output should include **only instructions**. Do not generate actual JSON or narrative text. Your response should be customized instructions that can be used to prompt an LLM to process the image content into JSON and narrative text accurately and reliably. Focus on providing comprehensive and customizable guidance that can be used in subsequent tasks.\\n\\nThe most important part of your task is to ensure that every single **relationship** between the cells, headers, and overall structure is **captured accurately**. It is not enough to convert the table cells into JSON individually; the **relationships between the 