In [None]:
from typing import TYPE_CHECKING


if TYPE_CHECKING:
    from math_rag.application.containers import ApplicationContainer
    from math_rag.infrastructure.containers import InfrastructureContainer

    application_container: ApplicationContainer
    infrastructure_container: InfrastructureContainer

In [None]:
RESET = False
%load_ext hooks.notebook_hook

### Prepare input file

In [None]:
from decouple import config
from huggingface_hub import AsyncInferenceClient


HUGGINGFACE_TOKEN = config('HUGGINGFACE_TOKEN')

MODEL_HUB_ID = 'microsoft/Phi-3-mini-4k-instruct'

client = AsyncInferenceClient(
    model=MODEL_HUB_ID,
    provider='hf-inference',
    timeout=None,
    api_key=HUGGINGFACE_TOKEN,
)

In [3]:
from math_rag.application.base.assistants import BaseAssistantInput, BaseAssistantOutput


class SomeInput(BaseAssistantInput):
    pass


class SomeOutput(BaseAssistantOutput):
    result: int

In [None]:
from math_rag.application.models.inference import (
    LLMBatchRequest,
    LLMConversation,
    LLMMessage,
    LLMParams,
    LLMRequest,
)


MODEL_HUB_ID = 'microsoft/Phi-3-mini-4k-instruct'
some_input = SomeInput()

_requests = [
    LLMRequest(
        conversation=LLMConversation(
            messages=[
                LLMMessage(role='system', content='You are a helpful assistant.'),
                LLMMessage(role='user', content=f'what is {i}+2'),
            ]
        ),
        params=LLMParams(
            model=MODEL_HUB_ID,
            temperature=0,
            response_type=SomeOutput,
            max_completion_tokens=10,
            metadata={'input_id': str(some_input.id)},
        ),
    )
    for i in range(50)
]

batch_request: LLMBatchRequest = LLMBatchRequest(requests=_requests)

In [5]:
from math_rag.application.models.inference import (
    EMBatchRequest,
    EMParams,
    EMRequest,
)


MODEL_HUB_ID = 'BAAI/bge-large-en-v1.5'

_em_requests = [
    EMRequest(
        text=f'hello world {i}',
        params=EMParams(model=MODEL_HUB_ID, dimensions=1024),
    )
    for i in range(50)
]

em_batch_request: EMBatchRequest = EMBatchRequest(requests=_em_requests)

### Apptainer

In [4]:
from math_rag.infrastructure.containers import InfrastructureContainer


infrastructure_container = InfrastructureContainer()
infrastructure_container.init_resources()

tgi_batch_llm = infrastructure_container.tgi_batch_llm()
tei_batch_em = infrastructure_container.tei_batch_em()

In [None]:
await tgi_batch_llm.init_resources()

In [None]:
res = await tgi_batch_llm.batch_generate(
    batch_request=batch_request,
    response_type=SomeOutput,
    poll_interval=3 * 60,
    max_tokens_per_day=None,
    max_input_file_size=None,
    max_num_retries=0,
)
res

In [None]:
await tei_batch_em.init_resources()

In [None]:
res = await tei_batch_em.batch_embed(
    batch_request=em_batch_request,
    poll_interval=3 * 60,
    max_tokens_per_day=None,
    max_input_file_size=None,
    max_num_retries=0,
)
res

In [None]:
from os import environ

from decouple import config


environ['PBS_O_WORKDIR'] = '../.tmp'
environ['TGI_API_KEY'] = config('HUGGINGFACE_TOKEN')
environ['MODEL_HUB_ID'] = 'microsoft/Phi-3-mini-4k-instruct'

%run ../assets/hpc/hf/tgi/tgi_client.py