In [1]:
import json
import pymupdf4llm
import pandas as pd

key = json.load(open('./openai_key.json'))['key']
llm_test_data = pd.DataFrame({'filename':['chowderhut_20231005_011.pdf','shell_20231005_003.pdf',
                                          'beerhouse_20231209_005.pdf','dennys_20231209_004.pdf',
                                          'cafemason_20231005_009.pdf','topgolf_20231209_011.pdf',
                                          'yellow_20231209_008.pdf'],
                              'output':[{'category': 'food', 'total_bill_amount': 21.15},
                                        {'category': 'transport', 'total_bill_amount': 28.32},
                                        {'category': 'food', 'total_bill_amount': 41.72},
                                        {'category': 'food', 'total_bill_amount': 58.44},
                                        {'category': 'food', 'total_bill_amount': 32.59},
                                        {'category': 'other', 'total_bill_amount': 155.68},
                                        {'category': 'transport', 'total_bill_amount': 43.02},
                                       ]
                             })

from openai import OpenAI
def prompt_llm(bill_text, key):
    client = OpenAI(api_key=key)
    completion = client.chat.completions.create(
    model="gpt-4o",
      messages=[
        {"role": "system", "content": "You are an AI assistant, skilled in helping \
         extract billing information from invoices and bills to help in reimbursement processes."},
        {"role": "user", "content": "Using the following extracted information from a bill in triple quotes, generate \
          total_bill_amount, category which is one of food, transport, other as output. \
         Output only in json format as {\"total_bill_amount\":, \"category\":}. \
         \n Bill Extract: \n \n \n '''" + bill_text + "'''"}
      ],
      seed = 42
    )
    return completion.choices[0].message.content

In [2]:
import unittest

class LLMStability(unittest.TestCase):
        
    def test_one_row(self):
        row = llm_test_data.iloc[0]
        md_text = pymupdf4llm.to_markdown("./bills/"+row['filename'])
        response = prompt_llm(md_text, key)
        response_json = json.loads(response[response.find('{'):response.rfind('}')+1])
        self.assertDictEqual(response_json, {'category': 'food', 'total_bill_amount': 21.15})

    def test_llm_output_sample(self):
        for i,row in llm_test_data.head(3).iterrows():
            md_text = pymupdf4llm.to_markdown("./bills/"+row['filename'])
            response = prompt_llm(md_text, key)
            response_json = json.loads(response[response.find('{'):response.rfind('}')+1])
            self.assertDictEqual(response_json, row['output'])
    
    def test_llm_output_full(self):
        ROWS_CORRECT = 0
        EXPECTED_MATCH_RATE = 0.5
        for i,row in llm_test_data.iterrows():
            md_text = pymupdf4llm.to_markdown("./bills/"+row['filename'])
            response = prompt_llm(md_text, key)
            response_json = json.loads(response[response.find('{'):response.rfind('}')+1])
            if(response_json == row['output']):
                ROWS_CORRECT=ROWS_CORRECT+1
        self.assertGreaterEqual(ROWS_CORRECT/llm_test_data.shape[0],EXPECTED_MATCH_RATE)

result = unittest.main(argv=[''], verbosity=1, exit=False)

...
----------------------------------------------------------------------
Ran 3 tests in 11.314s

OK


In [3]:
result.result.wasSuccessful()

True

In [4]:
len(result.result.failures)

0

In [5]:
result.result.testsRun

3