In [1]:
import sys
from pathlib import Path

In [2]:
project_root = Path().resolve().parent
sys.path.append(str(project_root))

In [4]:
from src.data.data_loader import DataLoader

In [5]:
test_loader = DataLoader(split='test', seed=42)
test_data = test_loader.load_data()
len(test_data)

608

In [6]:
train_loader = DataLoader(split='train', seed=42)
train_data = train_loader.load_data()
len(train_data)

2429

In [4]:
data_loader = DataLoader()
qa_pairs = data_loader.load_data()

In [5]:
qa_pairs[0]

{'entry_id': 'entry_0000',
 'pre_text': ['26 | 2009 annual report in fiscal 2008 , revenues in the credit union systems and services business segment increased 14% ( 14 % ) from fiscal 2007 .',
  'all revenue components within the segment experienced growth during fiscal 2008 .',
  'license revenue generated the largest dollar growth in revenue as episys ae , our flagship core processing system aimed at larger credit unions , experienced strong sales throughout the year .',
  'support and service revenue , which is the largest component of total revenues for the credit union segment , experienced 34 percent growth in eft support and 10 percent growth in in-house support .',
  'gross profit in this business segment increased $ 9344 in fiscal 2008 compared to fiscal 2007 , due primarily to the increase in license revenue , which carries the highest margins .',
  'liquidity and capital resources we have historically generated positive cash flow from operations and have generally used fund

In [6]:
from src.models.answer_processor import AnswerProcessor


processor = AnswerProcessor()

In [7]:
llm_response = """

{
  "formula": "divide(subtract(9362.2, 9244.9), 9244.9)",
  "formatting_instructions": {
    "prefix": "",
    "suffix": "%",
    "rounding": 2,
    "multiplier": 100
  }
}

"""
    
try:
    answer = processor.process_answer(llm_response)
    print(f"Processed answer: {answer}")  # Should output: "1.27%"
except ValueError as e:
    print(f"Error: {e}")

Processed answer: 1.27%


In [8]:

from src.models.llm_interface import LLMInterface, QuestionContext


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
llm = LLMInterface()

NameError: name 'LLMInterface' is not defined

In [40]:
entry = qa_pairs[1024]

In [41]:
entry.keys()

dict_keys(['entry_id', 'pre_text', 'post_text', 'table', 'qa_pairs'])

In [42]:
context = QuestionContext(
            pre_text=entry['pre_text'],
            post_text=entry['post_text'],
            table=entry['table'],
            questions=entry['qa_pairs'],
            entry_id=entry['entry_id']
        )

In [43]:
from src.config.prompts import format_question_prompt


format_question_prompt({
                        "pre_text": context.pre_text,
                        "post_text": context.post_text,
                        "table": context.table,
                        "question": context.questions[0]['question']
                    })

"\n    pre_text: \n    ```\n    ['a significant portion of our natural gas production in the lower 48 states of the u.s .', 'is sold at bid-week prices or first-of-month indices relative to our specific producing areas .', 'average settlement date henry hub natural gas prices have been relatively stable for the periods of this report ; however , a decline began in september 2011 which has continued in 2012 with february averaging $ 2.68 per mmbtu .', 'should u.s .', 'natural gas prices remain depressed , an impairment charge related to our natural gas assets may be necessary .', 'our other major natural gas-producing regions are europe and eg .', 'natural gas prices in europe have been significantly higher than in the u.s .', 'in the case of eg our natural gas sales are subject to term contracts , making realized prices less volatile .', 'the natural gas sales from eg are at fixed prices ; therefore , our worldwide reported average natural gas realized prices may not fully track market

In [44]:
import asyncio

In [45]:
async def test_llm():
    try:
        answers = await llm.get_answers(context)
        return answers
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

In [46]:
results = await test_llm()

In [47]:
results

['```json\n{\n    "formula": "divide(subtract(77.97, 52.13), 52.13)",\n    "formatting_instructions": {\n        "prefix": "",\n        "suffix": "%",\n        "rounding": 1,\n        "multiplier": 100\n    }\n}\n```\n']

In [48]:
try:
    answer = processor.process_answer(results[0])
    print(f"Processed answer: {answer}")
except ValueError as e:
    print(f"Error: {e}")

Processed answer: 49.6%


In [49]:
entry['qa_pairs']

[{'question': 'how much has the western canadian select dollars per bbl increased since 2009?',
  'answer': '49.6%'}]

In [4]:
from src.evaluation.answer_evaluator import AnswerEvaluator


evaluator = AnswerEvaluator()

# Compare single answers
generated = "1.28%"
expected = "1.3%"
results = evaluator.compare_answers(generated, expected)
print(f"Comparison results: {results}")

Comparison results: {'number_match': True, 'prefix_match': True, 'suffix_match': True, 'rounding_match': False, 'percentage_diff': 1.5384615384615397, 'exact_match': False}


In [10]:
qa_pairs[0]['qa_pairs'][0]['answer']

'14.1%'

In [11]:
evaluator.parse_answer(qa_pairs[0]['qa_pairs'][0]['answer'])

ParsedAnswer(raw_string='14.1%', number=14.1, prefix='', suffix='%', decimal_places=1)

In [26]:
[y for y in x['qa_pairs'] for x in qa_pairs]

NameError: name 'x' is not defined

In [27]:
qa_pairs

[{'entry_id': 'entry_0000',
  'pre_text': ['26 | 2009 annual report in fiscal 2008 , revenues in the credit union systems and services business segment increased 14% ( 14 % ) from fiscal 2007 .',
   'all revenue components within the segment experienced growth during fiscal 2008 .',
   'license revenue generated the largest dollar growth in revenue as episys ae , our flagship core processing system aimed at larger credit unions , experienced strong sales throughout the year .',
   'support and service revenue , which is the largest component of total revenues for the credit union segment , experienced 34 percent growth in eft support and 10 percent growth in in-house support .',
   'gross profit in this business segment increased $ 9344 in fiscal 2008 compared to fiscal 2007 , due primarily to the increase in license revenue , which carries the highest margins .',
   'liquidity and capital resources we have historically generated positive cash flow from operations and have generally us

In [7]:
len(qa_pairs) * 0.2

607.4