In [48]:
import json 
from tqdm import tqdm
import time
import openai
from dotenv import load_dotenv
import os

In [49]:
load_dotenv(override=True)

True

In [50]:
INPUT_PATH = "../data/train_golden.json"
OUTPUT_PATH = "../data/train_rationale.json"
MODEL = "gpt-4o"
API_KEY = os.getenv("OPENAI_API_KEY")

In [51]:
openai.api_key = API_KEY

In [52]:
def build_prompt(question, context):
    return f"""
    You are a financial reasoning assistant.

    Given the following financial document and a question, write a Python-style program that calculates the answer using variables and arithmetic operations.

    Only use the information provided in the document.

    End your program with a line that assigns the final result to the variable `answer`.
    
    ### Document:
    Context: {context}

    ### Question:
    Question: {question}

    ### Python-style Program:
    """

In [53]:
def get_response_from_openai(prompt, model=MODEL, temperature=0.3):
    try:
        response = openai.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
            # api_key=api_key
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error: {e}")
        return ""

In [54]:
with open(INPUT_PATH, 'r') as f:
    data = json.load(f)

In [60]:
output_data = []

In [61]:
for item in tqdm(data, desc="Processing items"):
    question = item['question']
    context = item['golden_chunk']
    answer = item['answer']
    golden_program = item.get('golden_program', '')

    prompt = build_prompt(question, context)
    golden_program_gen = get_response_from_openai(prompt)

    output_data.append({
        "question_id": item['question_id'],
        "question": question,
        "golden_chunk": context,
        "answer": answer,
        "golden_program_generated": golden_program_gen,
        "golden_program": golden_program
    })

    time.sleep(1.5)  # Rate limiting

Processing items: 100%|██████████| 1/1 [00:03<00:00,  3.21s/it]


In [62]:
with open(OUTPUT_PATH, 'w') as f:
    json.dump(output_data, f, indent=4)
print(f"Rationale generation completed. Output saved to {OUTPUT_PATH}.")

Rationale generation completed. Output saved to ../data/train_rationale.json.
