In [None]:
import json
import time
import google.generativeai as genai

# -------------------------------
# 1. Configure the Google Generative AI API
# -------------------------------
genai.configure(api_key="KEY")  
model = genai.GenerativeModel('gemini-2.0-flash-001')

In [2]:
# -------------------------------
# 2. Load and Process the TAT-QA Dataset
# -------------------------------
with open('tatqa_text_or_tabletext.json', 'r', encoding="utf-8") as f:
    original_data = json.load(f)

In [3]:
questions = []
for entry in original_data:
    # build context from paragraphs only
    paragraph_text = "\n".join(p["text"] for p in entry["paragraphs"])
    for q in entry["questions"]:
        questions.append({
            "text":     paragraph_text,
            "question": q["question"]
        })
        if len(questions) == 3600:
            break
    if len(questions) == 3600:
        break

print(f"Total questions collected: {len(questions)}")

Total questions collected: 3600


In [4]:
# -------------------------------
# 3. Prompt templates
# -------------------------------
def answer_prompt(context: str, question: str) -> str:
    return f"""Context:
{context}

Question:
{question}

Please provide a concise answer in exactly one sentence, without any further explanation or calculation steps. It should start with "Answer: "

Answer:
"""

def evidence_prompt(context: str, question: str) -> str:
    return f"""In your evidence you should:
   - Explain in full English why each piece of data supports your answer. Strinctly start it with "Evidence:"

Context:
{context}

Question:
{question}

Answer and Evidence:
""".strip()

In [None]:
# -------------------------------
# 4. Helper to call Gemini with retries
# -------------------------------
def generate_with_retries(prompt: str, max_retries: int = 5, initial_delay: int = 5):
    delay = initial_delay
    for attempt in range(1, max_retries + 1):
        try:
            return model.generate_content(prompt)
        except Exception as e:
            if "429" in str(e):
                print(f"[429] rate limit on attempt {attempt}, retrying in {delay}s…")
                time.sleep(delay)
                delay *= 2
            else:
                raise
    raise RuntimeError("Max retries exceeded")

output_path = "tatqa_all_with_answers_and_evidence2.json"

# -------------------------------
# 5. Process every entry and question (with prints for verification)
# -------------------------------
results = []
i=1
for entry in original_data:

    context = "\n".join(p["text"] for p in entry["paragraphs"])

    # now loop all questions
    for q in entry["questions"]:
        i += 1
        print(i)
        question_uid  = q["uid"]
        question_text = q["question"].strip()

        # --- (a) generate the model's answer ---
        try:
            resp_ans = generate_with_retries(answer_prompt(context, question_text))
            ans      = resp_ans.text.strip()
        except Exception as e:
            print(f"[Answer error] qUID={question_uid}: {e}")
            ans = ""

        # --- (b) generate the gold‐evidence snippet ---
        try:
            resp_ev = generate_with_retries(evidence_prompt(context, question_text))
            ev_text = resp_ev.text
        except Exception as e:
            print(f"[Evidence error] qUID={question_uid}: {e}")
            ev_text = ""

        # extract after our marker
        if "Evidence:" in ev_text:
            snippet = ev_text.split("Evidence:")[-1]
        elif "The gold evidence is:" in ev_text:
            snippet = ev_text.split("The gold evidence is:")[-1]
        else:
            snippet = ""
        lines = [ln.strip() for ln in snippet.splitlines() if ln.strip()]
        gold_evidence = "\n".join(lines) if lines else "Not Available"

        # --- print for your inspection ---
        print(f"qUID: {question_uid}")
        # print(f"Table‐UID: {table_uid}")
        print(f"Question: {question_text}")
        print(f"Answer: {ans}")
        print(f"Gold Evidence: {gold_evidence}")
        print("-"*60)

        # --- collect into results ---
        results.append({
            "uid":           question_uid,
            "question":      question_text,
            "ans":           ans,
            "gold_evidence": gold_evidence
        })

        with open(output_path, "w", encoding="utf-8") as out_f:
            json.dump(results, out_f, ensure_ascii=False, indent=2)

3301
qUID: 57f725a8-925c-431d-851a-d6b71941888f
Question: What was the change in the payments for Charter-in vessels in operation between 2022 and 2023?
Answer: Answer: The information provided is insufficient to determine the change in charter-in vessel payments between 2022 and 2023.
Gold Evidence: Not Available
------------------------------------------------------------
3302
qUID: c32c65ab-dce9-4efb-b339-72b06ba5b26f
Question: What was the difference in the Discount based on incremental borrowing rate between Charter-in vessels in operation and Land Leases?
Answer: The provided text does not contain information about the discount based on incremental borrowing rate for any of the leases.
Gold Evidence: Not Available
------------------------------------------------------------
3303
qUID: 09f7b80a-58dc-4b9c-a325-561ab5d57ee5
Question: What do the directors' emoluments represent?
Answer: Answer: Directors' emoluments represent all earnings and aggregate contributions to pension scheme