In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys

import nest_asyncio


sys.path.insert(0, os.path.abspath('..'))
nest_asyncio.apply()

In [None]:
import logging


logging.basicConfig(
    level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s'
)

In [None]:
from math_rag.infrastructure.containers import InfrastructureContainer


RESET = False

infrastructure_container = InfrastructureContainer()
infrastructure_container.init_resources()

math_article_seeder = infrastructure_container.math_article_seeder()
math_expression_seeder = infrastructure_container.math_expression_seeder()
math_expression_classification_seeder = (
    infrastructure_container.math_expression_classification_seeder()
)
math_article_seeder.seed(reset=RESET)
await math_expression_seeder.seed(reset=RESET)
await math_expression_classification_seeder.seed(reset=RESET)

math_expression_repository = infrastructure_container.math_expression_repository()
math_expression_classification_repository = (
    infrastructure_container.math_expression_classification_repository()
)
llm = infrastructure_container.llm()

In [None]:
from enum import Enum


class MathExpressionCategory(str, Enum):
    EQUALITY = 'equality'
    INEQUALITY = 'inequality'
    CONSTANT = 'constant'
    VARIABLE = 'variable'
    OTHER = 'other'

In [None]:
# TODO
# - description for each class
# - how to determine classes?
# - do names need to take a single token?

In [None]:
import numpy as np

from openai import NOT_GIVEN
from openai.types.chat import ChatCompletion


def get_prompt(math_expr: str) -> str:
    return f"""
You are a mathematical expression classifier.
Given a mathematical expression, classify it in a single class regarding STRUCTURE of the expression.
Class must be a single word.

Return a class only!

Mathematical expression:
{math_expr}

Class:
"""


use_json = False


async def get_completion(prompt: str) -> ChatCompletion:
    return await llm.client.chat.completions.create(
        model='gpt-4o',
        messages=[{'role': 'user', 'content': prompt}],
        response_format={'type': 'json_object'} if use_json else NOT_GIVEN,
        logprobs=True,
        temperature=0.0,
        top_logprobs=5,
    )


completion = await get_completion(prompt)

for math_node in completion.choices[0].logprobs.content:
    for y in math_node.top_logprobs:
        print(f'"{y.token}": {np.exp(y.logprob)}')

    print('------')
    print(math_node.token)
    print(math_node.logprob)
    print(np.exp(math_node.logprob))

In [None]:
math_expressions = await math_expression_repository.get_math_expressions()

In [None]:
prompt = f"""
You are a mathematical expression classifier.
Given a mathematical expression, classify it in one of 4 given classes:
- constant
- variable
- formula
- other

Return a class only!

Mathematical expression:
{math_expr}

Class:
"""