In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

import nest_asyncio


sys.path.insert(0, os.path.abspath('..'))
nest_asyncio.apply()

In [2]:
import logging


logging.basicConfig(
    level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s'
)

In [None]:
from math_rag.infrastructure.containers import InfrastructureContainer


RESET = False

infrastructure_container = InfrastructureContainer()
infrastructure_container.init_resources()

math_article_seeder = infrastructure_container.math_article_seeder()
math_expression_seeder = infrastructure_container.math_expression_seeder()
math_expression_classification_seeder = (
    infrastructure_container.math_expression_classification_seeder()
)
math_article_seeder.seed(reset=RESET)
await math_expression_seeder.seed(reset=RESET)
await math_expression_classification_seeder.seed(reset=RESET)

math_expression_repository = infrastructure_container.math_expression_repository()
math_expression_classification_repository = (
    infrastructure_container.math_expression_classification_repository()
)
llm = infrastructure_container.openai_llm()

In [4]:
# TODO
# - description for each class
# - how to determine classes?
# - do names need to take a single token?

In [5]:
math_expressions = await math_expression_repository.get_math_expressions()

In [None]:
from math_rag.application.models.inference import (
    LLMConversation,
    LLMMessage,
    LLMParams,
    LLMRequest,
    LLMRequestBatch,
    LLMResponse,
)


response_type = str
prompts = ['what is 2 + 2', 'what is 2 + 3']

request_batch = LLMRequestBatch(
    requests=[
        LLMRequest(
            conversation=LLMConversation(
                messages=[LLMMessage(role='user', content=prompt)]
            ),
            params=LLMParams[response_type](
                model='gpt-4o-mini', temperature=0.0, response_type=response_type
            ),
        )
        for prompt in prompts
    ]
)
batch_id = await llm.batch_generate_init(request_batch)
batch_id

2025-03-06 20:10:04,906 - INFO - HTTP Request: POST https://api.openai.com/v1/files "HTTP/1.1 200 OK"
2025-03-06 20:10:06,049 - INFO - HTTP Request: POST https://api.openai.com/v1/batches "HTTP/1.1 200 OK"
2025-03-06 20:10:06,051 - INFO - Batch batch_67c9f30de7ac8190a781a743f42395c2 created with status validating


'batch_67c9f30de7ac8190a781a743f42395c2'

In [8]:
result = await llm.batch_generate_result(batch_id, response_type)
result

2025-03-06 20:12:03,931 - INFO - HTTP Request: GET https://api.openai.com/v1/batches/batch_67c9f30de7ac8190a781a743f42395c2 "HTTP/1.1 200 OK"
2025-03-06 20:12:03,932 - INFO - Batch batch_67c9f30de7ac8190a781a743f42395c2 status updated to validating


In [9]:
result = await llm.batch_generate_result('batch_67c9f30de7ac8190a781a743f42395c2', str)
result

2025-03-06 22:33:14,496 - INFO - HTTP Request: GET https://api.openai.com/v1/batches/batch_67c9f30de7ac8190a781a743f42395c2 "HTTP/1.1 200 OK"
2025-03-06 22:33:14,498 - INFO - Batch batch_67c9f30de7ac8190a781a743f42395c2 status completed
2025-03-06 22:33:14,967 - INFO - HTTP Request: GET https://api.openai.com/v1/files/file-1gUSmeCsE19eAXMD5s6nCC/content "HTTP/1.1 200 OK"
2025-03-06 22:33:15,303 - INFO - HTTP Request: GET https://api.openai.com/v1/files/file-16wwEDXWSeiBETu5n7AiGf/content "HTTP/1.1 200 OK"
2025-03-06 22:33:15,731 - INFO - HTTP Request: DELETE https://api.openai.com/v1/files/file-16wwEDXWSeiBETu5n7AiGf "HTTP/1.1 200 OK"
2025-03-06 22:33:16,286 - INFO - HTTP Request: DELETE https://api.openai.com/v1/files/file-1gUSmeCsE19eAXMD5s6nCC "HTTP/1.1 200 OK"


LLMResponseBatch[str](request_batch=LLMRequestBatch[str](requests=[]), responses=[LLMResponse[str](content='2 + 2 equals 4.'), LLMResponse[str](content='2 + 3 equals 5.')])

In [11]:
for item in result.responses:
    print(item.content)

2 + 2 equals 4.
2 + 3 equals 5.


In [7]:
from math_rag.application.models.inference import LLMConversation


print(f'{LLMConversation.__class__.__module__}.{LLMConversation.__class__.__name__}')
print(f'{LLMConversation.__module__}.{LLMConversation.__qualname__}')

pydantic._internal._model_construction.ModelMetaclass
math_rag.application.models.inference.llm_conversation.LLMConversation


In [8]:
response_type = LLMConversation

print(f'{response_type.__module__}.{response_type.__qualname__}')

math_rag.application.models.inference.llm_conversation.LLMConversation


In [6]:
from typing import Generic

from pydantic import BaseModel

from math_rag.application.types.inference import LLMResponseType


class SomeRequest(BaseModel, Generic[LLMResponseType]):
    response_type: type[LLMResponseType]


class MyResponse(BaseModel):
    name: str


class SomeResponse(SomeRequest[MyResponse]):
    pass


some_request = SomeRequest(response_type=MyResponse)

some_dict = some_request.model_dump()

some_request_validated = SomeRequest.model_validate(some_dict)

In [2]:
from math_rag.application.models.assistants import KCAssistantOutput
from math_rag.application.models.inference import LLMParams


output = KCAssistantOutput(katex='some katex')

params = LLMParams(model='some model', temperature=0.0, response_type=KCAssistantOutput)
print(params.response_type)

<class 'math_rag.application.models.assistants.kc_assistant_output.KCAssistantOutput'>


In [3]:
from math_rag.infrastructure.mappings.documents import LLMParamsMapping


document_params = LLMParamsMapping[KCAssistantOutput].to_target(params)
bson_dict = document_params.model_dump()

In [None]:
from math_rag.infrastructure.models.documents import LLMParamsDocument


original_document_params = LLMParamsDocument.model_validate(bson_dict)
original_params = LLMParamsMapping[KCAssistantOutput].to_source(
    original_document_params
)

id=UUID('2fb3641b-09b1-48b7-8f78-3e23de2eee83') model='some model' temperature=0.0 top_logprobs=None reasoning_effort=None max_completion_tokens=None response_type='math_rag.application.models.assistants.KCAssistantOutput' metadata=None n=1
