In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

import nest_asyncio


sys.path.insert(0, os.path.abspath('..'))
nest_asyncio.apply()

### Prepare input file

In [2]:
from decouple import config
from huggingface_hub import AsyncInferenceClient


HUGGINGFACE_TOKEN = config('HUGGINGFACE_TOKEN')

TGI_BASE_URL = ''
TGI_MODEL = 'microsoft/Phi-3-mini-4k-instruct'

client = AsyncInferenceClient(
    # base_url=TGI_BASE_URL,
    model=TGI_MODEL,
    provider='hf-inference',
    timeout=None,
    api_key=HUGGINGFACE_TOKEN,
)

In [20]:
from math_rag.application.base.assistants import BaseAssistantInput, BaseAssistantOutput


class SomeInput(BaseAssistantInput):
    pass


class SomeOutput(BaseAssistantOutput):
    result: int

In [None]:
import json

from pathlib import Path

from math_rag.application.models.inference import (
    LLMBatchRequest,
    LLMBatchResult,
    LLMConversation,
    LLMMessage,
    LLMParams,
    LLMRequest,
)
from math_rag.infrastructure.mappings.inference.huggingface import (
    LLMRequestMapping,
    LLMResponseListMapping,
)


some_input = SomeInput()

request = LLMRequest(
    conversation=LLMConversation(
        messages=[
            LLMMessage(role='system', content='You are a helpful assistant.'),
            LLMMessage(role='user', content='what is 2+2'),
        ]
    ),
    params=LLMParams(
        model=TGI_MODEL,
        temperature=0,
        response_type=SomeOutput,
        max_completion_tokens=10,
        metadata={'input_id': str(some_input.id)},
    ),
)

batch_request: LLMBatchRequest = LLMBatchRequest(requests=[request])
requests = [LLMRequestMapping.to_target(request) for request in batch_request.requests]

lines = [json.dumps(request, separators=(',', ':')) for request in requests]

In [None]:
jsonl_str = '\n'.join(lines)
jsonl_bytes = jsonl_str.encode('utf-8')

In [32]:
request_dict = json.loads(lines[0])

In [33]:
result = await client.chat_completion(**request_dict)
result

ChatCompletionOutput(choices=[ChatCompletionOutputComplete(finish_reason='stop', index=0, message=ChatCompletionOutputMessage(role='assistant', content='{ "result": 4 }', tool_call_id=None, tool_calls=None), logprobs=None)], created=1743600003, id='', model='microsoft/Phi-3-mini-4k-instruct', system_fingerprint='3.2.1-native', usage=ChatCompletionOutputUsage(completion_tokens=9, prompt_tokens=17, total_tokens=26), object='chat.completion')

In [40]:
response_list = LLMResponseListMapping.to_source(
    result,
    request_id=request.id,
    input_id=request_dict['extra_body']['input_id'],
    response_type=SomeOutput,
)
response_list

LLMResponseList(id=UUID('f7fd3b29-2772-468d-831f-b160bf1dfaa0'), request_id=UUID('cb02d5a3-bed8-4109-b06c-04d585081ef3'), responses=[LLMResponse(id=UUID('3976268b-adbf-494a-b88b-fda24781d477'), content=BoundAssistantOutput(id=UUID('bd7fdedd-f32a-4d7f-845d-0a21413828b6'), input_id=UUID('c882baa3-2047-4f65-b8df-bd176bba3b2c'), result=4), logprobs=None)])

In [None]:
input_file_path = Path(f'.tmp/input_{batch_request.id}.jsonl')

with open(input_file_path, 'w') as input_file:
    for line in lines:
        input_file.write(line + '\n')

### Apptainer

In [2]:
from pathlib import Path

from math_rag.infrastructure.containers import InfrastructureContainer


infrastructure_container = InfrastructureContainer()
infrastructure_container.init_resources()

apptainer_client = infrastructure_container.apptainer_client()
sftp_client = infrastructure_container.sftp_client()
pbs_pro_client = infrastructure_container.pbs_pro_client()
hpc_client = infrastructure_container.hpc_client()

In [3]:
await apptainer_client.health()

True

In [6]:
hf_cli_def_path = Path('../assets/hpc/hf/hf_cli.def')
assert hf_cli_def_path.exists()

hf_cli_def_stream = await apptainer_client.build(hf_cli_def_path)

In [7]:
remote_project_root = Path('tgi_new')

In [8]:
# TODO upload .def as well!
await sftp_client.upload(hf_cli_def_stream, remote_project_root / 'hf_cli.sif')

In [None]:
# TODO build images in a for loop, define paths as a list/tuple,
# use the same name mapping, e.g. -> tgi.def -> tgi.sif

In [6]:
tgi_def_path = Path('../assets/huggingface/tgi/tgi.def')
assert tgi_def_path.exists()

tgi_def_stream = await apptainer_client.build(tgi_def_path)

In [7]:
await sftp_client.upload(tgi_def_stream, remote_project_root / 'tgi.sif')

In [8]:
tgi_client_def_path = Path('../assets/huggingface/tgi/tgi_client.def')
assert tgi_client_def_path.exists()

tgi_client_def_stream = await apptainer_client.build(tgi_client_def_path)

In [9]:
await sftp_client.upload(tgi_client_def_stream, remote_project_root / 'tgi_client.sif')

In [10]:
env_path = Path('../.env.hpc')
assert env_path.exists()

In [11]:
await sftp_client.upload(env_path, remote_project_root / '.env.hpc')

In [12]:
tgi_sh_path = Path('../assets/huggingface/tgi/tgi.sh')
assert tgi_sh_path.exists()

In [13]:
await sftp_client.upload(tgi_sh_path, remote_project_root / 'tgi.sh')

In [None]:
tgi_py_path = Path('../assets/hpc/hf/tgi/tgi.py')
assert tgi_py_path.exists()

In [None]:
await sftp_client.upload(tgi_py_path, remote_project_root / 'tgi.py')

In [None]:
from datetime import timedelta


# TODO where to add this code??

id = await pbs_pro_client.queue_submit(
    remote_project_root,
    'tgi.sh',
    num_chunks=1,
    num_cpus=8,
    num_gpus=1,
    mem=16 * 1024**3,
    walltime=timedelta(minutes=20),
)
id

'471572.x3000c0s25b0n0.hsn.hpc.srce.hr'