In [21]:
from openai import OpenAI
from time import time

In [24]:
def measure_time(prompts):
    client = OpenAI(api_key="EMPTY", base_url="http://localhost:8000/v1")
    start_time = time()
    for prompt in prompts:
        client.chat.completions.create(
        model="",
        messages=[
            {"role": "developer", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ],
        max_completion_tokens=1000,
        extra_body={"chat_template_kwargs": {"enable_thinking": False}}
        )
    return time() - start_time

In [None]:
# those settings use vLLM server
client = OpenAI(api_key="EMPTY", base_url="http://localhost:8000/v1")

chat_response = client.chat.completions.create(
    model="",
    messages=[
        {"role": "developer", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Are datascientist students a szczaw? Give me detailed answer"},
    ],
    max_completion_tokens=1000,
    extra_body={"chat_template_kwargs": {"enable_thinking": False}}
)
content = chat_response.choices[0].message.content.strip()
print("Response:\n", content)

# Exercise 1

## Unquantized model
vllm serve Qwen/Qwen3-0.6B --port 8000 --max-model-len 4096 --gpu-memory-utilization 0.7

In [20]:
prompts = [
    "How important is LLMOps on scale 0-10?",
    "Jak waÅ¼ne jest LLMOps w skali 0-10?",
    "What is supervised learning?",
    "Co to jest uczenie ze wzmocnieniem?",
    "What is the meaning of life based on spaghetti",
    "(APIServer pid=91092) INFO 02-08 02:50:34 [launcher.py:46] Route: /classify, Methods: POST",  # bcs why not?
    "How many R there is in RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRPR. Find x",
    "Give me 10 example prompts that I can ask you",
    "Give me a seahorse emoji. Make sure that this is seahorse emoji",
    "Are datascientist students a szczaw? Give me detailed answer"
]

In [25]:
measure_time(prompts)

9.538908958435059

Available KV cache memory: 2.78 GiB\
GPU KV cache size: 25,968 tokens

## Quantized model
vllm serve Qwen/Qwen3-0.6B --port 8000 --max-model-len 4096 --gpu-memory-utilization 0.7 --quantization bitsandbytes

In [29]:
measure_time(prompts)

14.496115922927856

Available KV cache memory: 3.36 GiB\
GPU KV cache size: 31,440 tokens

Idk why, but quantized model run slightly longer. Maybe I opened some background program?

# Exercise 2

In [30]:
import datetime
import json
from typing import Callable

In [96]:
def make_llm_request(prompt: str, tool_func=get_tool_definitions) -> str:
    client = OpenAI(api_key="EMPTY", base_url="http://localhost:8000/v1")

    messages = [
        {"role": "developer", "content": "You are a weather assistant."},
        {"role": "user", "content": prompt},
    ]

    tool_definitions, tool_name_to_func = tool_func()

    # guard: loop limit, we break as soon as we get an answer
    for _ in range(10):
        response = client.chat.completions.create(
            model="",
            messages=messages,
            tools=tool_definitions,  # always pass all tools in this example
            tool_choice="auto",
            max_completion_tokens=1000,
            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
        )
        resp_message = response.choices[0].message
        messages.append(resp_message.model_dump())

        print(f"Generated message: {resp_message.model_dump()}")
        print()

        # parse possible tool calls (assume only "function" tools)
        if resp_message.tool_calls:
            for tool_call in resp_message.tool_calls:
                func_name = tool_call.function.name
                func_args = json.loads(tool_call.function.arguments)

                # call tool, serialize result, append to messages
                func = tool_name_to_func[func_name]
                func_result = func(**func_args)

                messages.append(
                    {
                        "role": "tool",
                        "content": json.dumps(func_result),
                        "tool_call_id": tool_call.id,
                    }
                )
        else:
            # no tool calls, we're done
            return resp_message.content

    # we should not get here
    last_response = resp_message.content
    return f"Could not resolve request, last response: {last_response}"


def get_tool_definitions() -> tuple[list[dict], dict[str, Callable]]:
    tool_definitions = [
        {
            "type": "function",
            "function": {
                "name": "get_current_date",
                "description": 'Get current date in the format "Year-Month-Day" (YYYY-MM-DD).',
                "parameters": {},
            },
        },
        {
            "type": "function",
            "function": {
                "name": "get_weather_forecast",
                "description": "Get weather forecast at given country, city, and date.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "country": {
                            "type": "string",
                            "description": "The country the city is in.",
                        },
                        "city": {
                            "type": "string",
                            "description": "The city to get the weather for.",
                        },
                        "date": {
                            "type": "string",
                            "description": (
                                "The date to get the weather for, "
                                'in the format "Year-Month-Day" (YYYY-MM-DD). '
                                "At most 4 weeks into the future."
                            ),
                        },
                    },
                    "required": ["country", "city", "date"],
                },
            },
        },
    ]

    tool_name_to_callable = {
        "get_current_date": current_date_tool,
        "get_weather_forecast": weather_forecast_tool,
    }

    return tool_definitions, tool_name_to_callable


def current_date_tool() -> str:
    return datetime.date.today().isoformat()


def weather_forecast_tool(country: str, city: str, date: str) -> str:
    if country.lower() in {"united kingdom", "uk", "england"}:
        return "Fog and rain"
    else:
        return "Sunshine"

In [124]:
datetime.datetime.today().isoformat(timespec='seconds')

'2026-02-08T05:14:13'

In [97]:
prompt = "What will be weather in Birmingham in two weeks?"
response = make_llm_request(prompt)
print("Response:\n", response)

print()

prompt = "What will be weather in Warsaw the day after tomorrow?"
response = make_llm_request(prompt)
print("Response:\n", response)

print()

prompt = "What will be weather in united kingdom in two months?"
response = make_llm_request(prompt)
print("Response:\n", response)

Generated message: {'content': None, 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': [{'id': 'chatcmpl-tool-b28b6b2c920306b6', 'function': {'arguments': '{"country": "Birmingham", "city": "Birmingham", "date": "YYYY-MM-DD"}', 'name': 'get_weather_forecast'}, 'type': 'function'}], 'reasoning': None, 'reasoning_content': None}

Generated message: {'content': 'The weather in Birmingham in two weeks will be Sunshine.', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': [], 'reasoning': None, 'reasoning_content': None}

Response:
 The weather in Birmingham in two weeks will be Sunshine.

Generated message: {'content': None, 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': [{'id': 'chatcmpl-tool-a210675d9f490f63', 'function': {'arguments': '{"country": "Warsaw", "city": "Warsaw", "date": "2023-10-23"}', 'name'

In [59]:
import polars as pl

In [98]:
def get_tool_definitions_2():
    tool_definitions = [
        {
            "type": "function",
            "function": {
                "name": "read_remote_csv",
                "description": "Read csv data under provided URL. Return n rows. Data has to be in csv format.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "url": {
                            "type": "string",
                            "description": "URL to csv file.",
                        },
                        "n": {
                            "type": "integer",
                            "description": "Number of rows to return.",
                        },
                    },
                    "required": ["url", "n"],
                },
            },
        },

        {
            "type": "function",
            "function": {
                "name": "read_remote_parquet",
                "description": "Read parquet data under provided URL. Return n rows. Data has to be in parquet format.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "url": {
                            "type": "string",
                            "description": "URL to parquet file.",
                        },
                        "n": {
                            "type": "integer",
                            "description": "Number of rows to return.",
                        },
                    },
                    "required": ["url", "n"],
                },
            },
        },
    ]

    tool_name_to_callable = {
        "read_remote_csv": read_remote_csv,
        "read_remote_parquet": read_remote_parquet
    }

    return tool_definitions, tool_name_to_callable

In [99]:
def read_remote_csv(url, n):
    n = min(n, 20)
    df = pl.read_csv(url, n_rows=n)
    return str(df.to_dicts())

def read_remote_parquet(url, n):
    n = min(n, 20)
    df = pl.read_parquet(url, n_rows=n)
    return str(df.to_dicts())

In [100]:
morguls_csv = "https://raw.githubusercontent.com/j-adamczyk/ApisTox_dataset/refs/heads/master/outputs/dataset_final.csv"
taxi_parquet = "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet"

In [104]:
prompt = f"Use this url to get csv data and tell me about the data you find there: {morguls_csv}"
response = make_llm_request(prompt, get_tool_definitions_2)
print("Response:\n", response)

Generated message: {'content': None, 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': [{'id': 'chatcmpl-tool-8ac93ff39a0dda52', 'function': {'arguments': '{"url": "https://raw.githubusercontent.com/j-adamczyk/ApisTox_dataset/refs/heads/master/outputs/dataset_final.csv", "n": 5}', 'name': 'read_remote_csv'}, 'type': 'function'}], 'reasoning': None, 'reasoning_content': None}

Generated message: {'content': 'Here is the data from the CSV file:\n\n1. **Ethanedioic acid**: CID 971, CAS 144-62-7, SMILES `O=C(O)C(=O)O`, source ECOTOX, year 1832, toxicity type Contact.\n2. **Para-cymene**: CID 7463, CAS 99-87-6, SMILES `Cc1ccc(C(C)C)cc1`, source BPDB, year 1833, toxicity type Other.\n3. **Kieselguhr**: CID 24261, CAS 61790-53-2, SMILES `O=[Si]=O`, source ECOTOX, year 1833, toxicity type Contact.\n4. **Benzoic acid**: CID 243, CAS 65-85-0, SMILES `O=C(O)c1ccccc1`, source ECOTOX, year 1833, toxicity type Contact.\n5. **Tetradifon (Re

In [107]:
prompt = f"Use this url to get parquet data and tell me about shortly about the data you find there: {taxi_parquet}"
response = make_llm_request(prompt, get_tool_definitions_2)
print("Response:\n", response)

Generated message: {'content': None, 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': [{'id': 'chatcmpl-tool-ad05dacc35aa2f8a', 'function': {'arguments': '{"url": "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet", "n": 5}', 'name': 'read_remote_parquet'}, 'type': 'function'}], 'reasoning': None, 'reasoning_content': None}

Generated message: {'content': 'Here is a summary of the parquet data:\n\n1. **VendorID**: The first row has a VendorID of 1.\n2. **Pickup and Dropoff Times**: The data includes pickup and dropoff times for each trip.\n3. **Passenger Count**: Each trip has 1 or 3 passengers.\n4. **Trip Distance**: The distance traveled for each trip is provided.\n5. **Rates and Other Fields**: The data includes various rates and additional details like payment type, fare, and other parameters.\n\nThis data appears to represent trip information from a specific date range. If you need more det