In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

import nest_asyncio


sys.path.insert(0, os.path.abspath('..'))
nest_asyncio.apply()

In [2]:
import logging


logging.basicConfig(
    level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s'
)

### Prepare input file

In [None]:
from decouple import config
from huggingface_hub import AsyncInferenceClient


HUGGINGFACE_TOKEN = config('HUGGINGFACE_TOKEN')

MODEL_HUB_ID = 'microsoft/Phi-3-mini-4k-instruct'

client = AsyncInferenceClient(
    model=MODEL_HUB_ID,
    provider='hf-inference',
    timeout=None,
    api_key=HUGGINGFACE_TOKEN,
)

In [3]:
from math_rag.application.base.assistants import BaseAssistantInput, BaseAssistantOutput


class SomeInput(BaseAssistantInput):
    pass


class SomeOutput(BaseAssistantOutput):
    result: int

In [4]:
import json

from pathlib import Path

from math_rag.application.models.inference import (
    LLMBatchRequest,
    LLMBatchResult,
    LLMConversation,
    LLMMessage,
    LLMParams,
    LLMRequest,
)
from math_rag.infrastructure.mappings.inference.huggingface import (
    LLMRequestMapping,
    LLMResponseListMapping,
)


MODEL_HUB_ID = 'microsoft/Phi-3-mini-4k-instruct'
some_input = SomeInput()

# request = LLMRequest(
#     conversation=LLMConversation(
#         messages=[
#             LLMMessage(role='system', content='You are a helpful assistant.'),
#             LLMMessage(role='user', content='what is 2+2'),
#         ]
#     ),
#     params=LLMParams(
#         model=MODEL_HUB_ID,
#         temperature=0,
#         response_type=SomeOutput,
#         max_completion_tokens=10,
#         metadata={'input_id': str(some_input.id)},
#     ),
# )

# batch_request: LLMBatchRequest = LLMBatchRequest(requests=[request])

_requests = [
    LLMRequest(
        conversation=LLMConversation(
            messages=[
                LLMMessage(role='system', content='You are a helpful assistant.'),
                LLMMessage(role='user', content=f'what is {i}+2'),
            ]
        ),
        params=LLMParams(
            model=MODEL_HUB_ID,
            temperature=0,
            response_type=SomeOutput,
            max_completion_tokens=10,
            metadata={'input_id': str(some_input.id)},
        ),
    )
    for i in range(200)
]

batch_request: LLMBatchRequest = LLMBatchRequest(requests=_requests)

In [5]:
requests = [LLMRequestMapping.to_target(request) for request in batch_request.requests]
lines = [json.dumps(request, separators=(',', ':')) for request in requests]
jsonl_str = '\n'.join(lines)
jsonl_bytes = jsonl_str.encode('utf-8')

In [7]:
request_dict = json.loads(lines[0])

In [8]:
result = await client.chat_completion(**request_dict)
result

ChatCompletionOutput(choices=[ChatCompletionOutputComplete(finish_reason='stop', index=0, message=ChatCompletionOutputMessage(role='assistant', content='{ "result": 4 }', tool_call_id=None, tool_calls=None), logprobs=None)], created=1744125157, id='', model='microsoft/Phi-3-mini-4k-instruct', system_fingerprint='3.2.1-native', usage=ChatCompletionOutputUsage(completion_tokens=9, prompt_tokens=17, total_tokens=26), object='chat.completion')

In [40]:
response_list = LLMResponseListMapping.to_source(
    result,
    request_id=request.id,
    input_id=request_dict['extra_body']['input_id'],
    response_type=SomeOutput,
)
response_list

LLMResponseList(id=UUID('f7fd3b29-2772-468d-831f-b160bf1dfaa0'), request_id=UUID('cb02d5a3-bed8-4109-b06c-04d585081ef3'), responses=[LLMResponse(id=UUID('3976268b-adbf-494a-b88b-fda24781d477'), content=BoundAssistantOutput(id=UUID('bd7fdedd-f32a-4d7f-845d-0a21413828b6'), input_id=UUID('c882baa3-2047-4f65-b8df-bd176bba3b2c'), result=4), logprobs=None)])

In [None]:
input_file_path = Path(f'.tmp/input_{batch_request.id}.jsonl')

with open(input_file_path, 'w') as input_file:
    for line in lines:
        input_file.write(line + '\n')

### Apptainer

In [6]:
from math_rag.infrastructure.containers import InfrastructureContainer


infrastructure_container = InfrastructureContainer()
infrastructure_container.init_resources()

tgi_batch_llm = infrastructure_container.tgi_batch_llm()

In [7]:
await tgi_batch_llm.init_resources()

2025-05-07 11:59:21,568 - INFO - Host canonicalization disabled
2025-05-07 11:59:21,568 - INFO - Opening SSH connection to login-gpu.hpc.srce.hr, port 22


2025-05-07 11:59:21,618 - INFO - [conn=0] Connected to SSH server at login-gpu.hpc.srce.hr, port 22
2025-05-07 11:59:21,618 - INFO - [conn=0]   Local address: 172.18.0.11, port 47240
2025-05-07 11:59:21,619 - INFO - [conn=0]   Peer address: 161.53.2.37, port 22
2025-05-07 11:59:21,649 - INFO - [conn=0] Beginning auth for user lpanic
2025-05-07 11:59:21,918 - INFO - [conn=0] Auth for user lpanic succeeded
2025-05-07 11:59:21,919 - INFO - [conn=0, chan=0] Requesting new SSH session
2025-05-07 11:59:21,951 - INFO - [conn=0, chan=0]   Command: mkdir -p tgi_default_root
2025-05-07 11:59:22,101 - INFO - [conn=0, chan=0] Received exit status 0
2025-05-07 11:59:22,101 - INFO - [conn=0, chan=0] Received channel close
2025-05-07 11:59:22,102 - INFO - [conn=0, chan=0] Channel closed
2025-05-07 11:59:22,102 - INFO - Command `mkdir -p tgi_default_root` in `run` returned stdout: 
2025-05-07 11:59:22,102 - INFO - [conn=0] Closing connection
2025-05-07 11:59:22,102 - INFO - [conn=0] Sending disconnect

In [8]:
res = await tgi_batch_llm.batch_generate(
    batch_request=batch_request,
    response_type=SomeOutput,
    poll_interval=3 * 60,
    max_tokens_per_day=None,
    max_num_retries=0,
)
res

2025-05-07 12:05:57,408 - INFO - Host canonicalization disabled
2025-05-07 12:05:57,409 - INFO - Opening SSH connection to login-gpu.hpc.srce.hr, port 22
2025-05-07 12:05:57,421 - INFO - [conn=24] Connected to SSH server at login-gpu.hpc.srce.hr, port 22
2025-05-07 12:05:57,422 - INFO - [conn=24]   Local address: 172.18.0.11, port 43032
2025-05-07 12:05:57,422 - INFO - [conn=24]   Peer address: 161.53.2.37, port 22
2025-05-07 12:05:57,456 - INFO - [conn=24] Beginning auth for user lpanic
2025-05-07 12:05:57,729 - INFO - [conn=24] Auth for user lpanic succeeded
2025-05-07 12:05:57,731 - INFO - [conn=24, chan=0] Requesting new SSH session
2025-05-07 12:05:57,763 - INFO - [conn=24, chan=0]   Subsystem: sftp
2025-05-07 12:05:57,774 - INFO - [conn=24, chan=0] Starting SFTP client
2025-05-07 12:05:57,987 - INFO - [conn=24, chan=0] Received exit status 0
2025-05-07 12:05:57,988 - INFO - [conn=24, chan=0] Received channel close
2025-05-07 12:05:57,990 - INFO - [conn=24, chan=0] SFTP client exi

LLMBatchResult(id=UUID('0c081dd8-6f87-4f9c-b875-b5bd2b4780a2'), batch_request_id=UUID('db910604-e604-4baa-9074-1f0ec402d574'), response_lists=[LLMResponseList(id=UUID('ad0418c9-0570-4e91-ba94-330f2c346748'), request_id=UUID('c3ddc060-a2ac-4540-8336-eca388f276a3'), responses=[LLMResponse(id=UUID('2bb755c4-286e-449e-9e14-99f4c18a1aa5'), content=BoundAssistantOutput(id=UUID('ec70d2d7-8348-48b2-89bb-049816121202'), input_id=UUID('da06fad5-0368-4ffb-9faa-8f199129318d'), result=13), logprobs=None)]), LLMResponseList(id=UUID('0f5674a6-8def-4ea1-8a4c-7a1ea58ee80c'), request_id=UUID('b1c78bda-9181-48f3-89c6-1fa0d2ad6f59'), responses=[LLMResponse(id=UUID('9e5b99ed-c460-46c7-abf6-aea0db1622dc'), content=BoundAssistantOutput(id=UUID('d65c1c46-4106-4a6e-b0cf-4dcfa8277b6c'), input_id=UUID('da06fad5-0368-4ffb-9faa-8f199129318d'), result=26), logprobs=None)]), LLMResponseList(id=UUID('c92b8c98-1f1a-417a-9b8d-ff8d3079cd7c'), request_id=UUID('c7407d1e-036e-49a8-8baf-5816f8ade059'), responses=[LLMRespons

In [9]:
from pathlib import Path


sftp_client = infrastructure_container.sftp_client()

await sftp_client.download(
    Path('tgi_default_root/error.log'), Path('../.tmp/error.log')
)

2025-05-05 09:25:03,554 - INFO - Host canonicalization disabled
2025-05-05 09:25:03,555 - INFO - Opening SSH connection to login-gpu.hpc.srce.hr, port 22
2025-05-05 09:25:03,574 - INFO - [conn=35] Connected to SSH server at login-gpu.hpc.srce.hr, port 22
2025-05-05 09:25:03,575 - INFO - [conn=35]   Local address: 172.18.0.7, port 55530
2025-05-05 09:25:03,575 - INFO - [conn=35]   Peer address: 161.53.2.37, port 22
2025-05-05 09:25:03,613 - INFO - [conn=35] Beginning auth for user lpanic
2025-05-05 09:25:03,879 - INFO - [conn=35] Auth for user lpanic succeeded
2025-05-05 09:25:03,880 - INFO - [conn=35, chan=0] Requesting new SSH session
2025-05-05 09:25:03,903 - INFO - [conn=35, chan=0]   Subsystem: sftp
2025-05-05 09:25:03,911 - INFO - [conn=35, chan=0] Starting SFTP client
2025-05-05 09:25:04,135 - INFO - [conn=35, chan=0] Received exit status 0
2025-05-05 09:25:04,136 - INFO - [conn=35, chan=0] Received channel close
2025-05-05 09:25:04,136 - INFO - [conn=35, chan=0] SFTP client exit

In [8]:
from pathlib import Path


sftp_client = infrastructure_container.sftp_client()

# tar -czvf snapshot.tar.gz data

# TODO download dir
await sftp_client.download(
    Path('tgi_default_root/snapshot.tar.gz'), Path('../.tmp/snapshot.tar.gz')
)

# tar -xzvf snapshot.tar.gz

2025-05-07 08:00:09,206 - INFO - Host canonicalization disabled
2025-05-07 08:00:09,206 - INFO - Opening SSH connection to login-gpu.hpc.srce.hr, port 22
2025-05-07 08:00:09,220 - INFO - [conn=38] Connected to SSH server at login-gpu.hpc.srce.hr, port 22
2025-05-07 08:00:09,221 - INFO - [conn=38]   Local address: 172.18.0.7, port 56070
2025-05-07 08:00:09,221 - INFO - [conn=38]   Peer address: 161.53.2.37, port 22
2025-05-07 08:00:09,257 - INFO - [conn=38] Beginning auth for user lpanic
2025-05-07 08:00:09,528 - INFO - [conn=38] Auth for user lpanic succeeded
2025-05-07 08:00:09,530 - INFO - [conn=38, chan=0] Requesting new SSH session
2025-05-07 08:00:09,561 - INFO - [conn=38, chan=0]   Subsystem: sftp
2025-05-07 08:00:09,570 - INFO - [conn=38, chan=0] Starting SFTP client
2025-05-07 08:00:09,816 - INFO - [conn=38, chan=0] Received exit status 0
2025-05-07 08:00:09,816 - INFO - [conn=38, chan=0] Received channel close
2025-05-07 08:00:09,817 - INFO - [conn=38, chan=0] SFTP client exit

In [None]:
from os import environ

from decouple import config


environ['PBS_O_WORKDIR'] = '../.tmp'
environ['TGI_API_KEY'] = config('HUGGINGFACE_TOKEN')
environ['MODEL_HUB_ID'] = 'microsoft/Phi-3-mini-4k-instruct'

%run ../assets/hpc/hf/tgi/tgi_client.py

In [None]:
import shutil

from pathlib import Path

import docker


class PrometheusSnapshotLoaderService:
    def __init__(
        self,
        snapshot_root: Path,
        prometheus_data_path: Path,
        prometheus_container_id: str,
    ):
        self.snapshot_root = snapshot_root
        self.prometheus_data_path = prometheus_data_path
        self.prometheus_container_id = prometheus_container_id

    def load(self, snapshot_id: str) -> None:
        snapshot_dir = self.snapshot_root / snapshot_id

        if not snapshot_dir.exists():
            raise FileNotFoundError(f'Snapshot ID not found: {snapshot_id}')

        block_dirs = list(snapshot_dir.glob('01*'))

        if not block_dirs:
            raise FileNotFoundError(f'No TSDB block found in snapshot: {snapshot_id}')

        block = block_dirs[0]

        if self.prometheus_data_path.exists():
            shutil.rmtree(self.prometheus_data_path)

        self.prometheus_data_path.mkdir(parents=True)

        shutil.copytree(block, self.prometheus_data_path / block.name)

        client = docker.from_env()
        container = client.containers.get(self.prometheus_container_id)
        container.restart()

In [None]:
# TODO zip snapshot
# TODO download
# TODO unzip snapshot to prometheus/snapshots
# TODO reload prometheus


file_system_client = infrastructure_container.file_system_client()

REMOTE_ROOT_PATH = Path('tgi_default_root')
pbs_job_id = ...
snapshot_json_path = REMOTE_ROOT_PATH / f'status_{pbs_job_id}.json'

if await file_system_client.test(snapshot_json_path):
    # TODO download and read .json
    # check status == success
    # then test data / name
    # archive it and download, then extract
    pass