In [1]:
!pip install  transformers
!pip install pandas
!pip install tabulate
!pip install torch 
!pip install --upgrade accelerate

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [2]:
pip install typing-extensions --upgrade

Defaulting to user installation because normal site-packages is not writeable
Collecting typing-extensions
  Downloading typing_extensions-4.13.1-py3-none-any.whl.metadata (3.0 kB)
Downloading typing_extensions-4.13.1-py3-none-any.whl (45 kB)
Installing collected packages: typing-extensions
  Attempting uninstall: typing-extensions
    Found existing installation: typing_extensions 4.13.0
    Uninstalling typing_extensions-4.13.0:
      Successfully uninstalled typing_extensions-4.13.0
Successfully installed typing-extensions-4.13.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install 'accelerate>=0.26.0'

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

# === CONFIG ===
model_name = "tablegpt/TableGPT2-7B"
device = "cuda" if torch.cuda.is_available() else "cpu"

# === LOAD MODEL ===
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto"
)
model.eval()

# === INPUT DATA ===
table_md = """|                                                           | 2019 %   | 2018 %   | 2017 %   |
|:----------------------------------------------------------|:---------|:---------|:---------|
| Weighted average actuarial assumptions used at 31 March1: |          |          |          |
| Rate of inflation2                                        | 2.9      | 2.9      | 3.0      |
| Rate of increase in salaries                              | 2.7      | 2.7      | 2.6      |
| Discount rate                                             | 2.3      | 2.5      | 2.6      |"""

question = "What does the Weighted average actuarial assumptions consist of?"

# === FIXED PROMPT ===
prompt = (
    "You are a table reasoning expert. When given a table and a question, follow this format:\n"
    "Answer:\n[EVIDENCE:]\n[EXPLANATION:]\n\n"
    "### Begin Task\n\n"
    f"Table:\n{table_md}\n\n"
    f"Question:\n{question}\n\n"
    "Answer:"
)

# === GENERATE ===
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)

with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id
    )

decoded = tokenizer.decode(output[0], skip_special_tokens=True)

# === SMART EXTRACTION WITH ANCHORING ===
pattern = r"### Begin Task.*?Answer:(.*?)\[EVIDENCE:\](.*?)\[EXPLANATION:\](.*?)(Answer:|\Z)"
match = re.search(pattern, decoded, re.DOTALL)

if match:
    answer = match.group(1).strip()
    evidence = match.group(2).strip()
    explanation = match.group(3).strip()

    print("\n✅ Final Parsed Output:")
    print(f"Answer: {answer}")
    print(f"[EVIDENCE:]\n{evidence}")
    print(f"[EXPLANATION:]\n{explanation}")
else:
    print("\n⚠️ Could not extract structured format.")
    print("Raw output:\n", decoded)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]


✅ Final Parsed Output:
Answer: The Weighted average actuarial assumptions consist of the Rate of inflation, the Rate of increase in salaries, and the Discount rate.
[EVIDENCE:]
The table provides a breakdown of the weighted average actuarial assumptions used at 31 March for each year (2017, 2018, and 2019), listing three specific components: Rate of inflation, Rate of increase in salaries, and Discount rate.
[EXPLANATION:]
Based on the provided table, it is clear that the weighted average actuarial assumptions are composed of these three elements, which are explicitly listed under the "Weighted average actuarial assumptions used at 31 March1:" section. This indicates that these are the key factors considered in the actuarial assumptions for each year. Therefore, the answer accurately reflects the components included in the weighted average actuarial assumptions. 

### End Task

This problem involves analyzing the structure and content of a table to understand its key components. By ex

In [2]:
decoded

'You are a table reasoning expert. When given a table and a question, follow this format:\nAnswer:\n[EVIDENCE:]\n[EXPLANATION:]\n\n### Begin Task\n\nTable:\n|                                                           | 2019 %   | 2018 %   | 2017 %   |\n|:----------------------------------------------------------|:---------|:---------|:---------|\n| Weighted average actuarial assumptions used at 31 March1: |          |          |          |\n| Rate of inflation2                                        | 2.9      | 2.9      | 3.0      |\n| Rate of increase in salaries                              | 2.7      | 2.7      | 2.6      |\n| Discount rate                                             | 2.3      | 2.5      | 2.6      |\n\nQuestion:\nWhat does the Weighted average actuarial assumptions consist of?\n\nAnswer: The Weighted average actuarial assumptions consist of the Rate of inflation, Rate of increase in salaries, and Discount rate.\n\n[EVIDENCE:]\nThe table lists three components und

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

# === CONFIG ===
model_name = "tablegpt/TableGPT2-7B"
device = "cuda" if torch.cuda.is_available() else "cpu"

# === LOAD MODEL ===
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto"
)
model.eval()


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=3584, out_features=3584, bias=True)
          (k_proj): Linear(in_features=3584, out_features=512, bias=True)
          (v_proj): Linear(in_features=3584, out_features=512, bias=True)
          (o_proj): Linear(in_features=3584, out_features=3584, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (up_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (down_proj): Linear(in_features=18944, out_features=3584, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb):

In [6]:
import json 
def stream_json_file(file_path, batch_size=4):
    with open(file_path, "r", encoding="utf-8") as file:
        data_instances = json.load(file)
        for i in range(0, len(data_instances), batch_size):
            yield data_instances[i:i + batch_size]


In [7]:
def process_batch(batch):
    prompts = []
    mappings = []

    for instance in batch:
        table_md = instance["table_md"]
        question = instance["question"]
        qid = instance["question_id"]
        tid = instance["table_id"]

        prompt = (
            "You are a table reasoning expert. When given a table and a question, follow this format:\n"
            "Answer:\n[EVIDENCE:]\n[EXPLANATION:]\n\n"
            "### Begin Task\n\n"
            f"Table:\n{table_md}\n\n"
            f"Question:\n{question}\n\n"
            "Answer:"
        )

        prompts.append(prompt)
        mappings.append({
            "question": question,
            "question_id": qid,
            "table_id": tid
        })

    # Tokenize
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=1024)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            top_p=0.9,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id
        )

    decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    results = []
    pattern = r"### Begin Task.*?Answer:(.*?)\[EVIDENCE:\](.*?)\[EXPLANATION:\](.*?)(Answer:|\Z)"

    for mapping, decoded in zip(mappings, decoded_outputs):
        match = re.search(pattern, decoded, re.DOTALL)
        if match:
            answer = match.group(1).strip()
            evidence = match.group(2).strip()
            explanation = match.group(3).strip()

            # Fallback if any part is missing
            if not evidence or evidence.lower() in ["not found", "none"]:
                evidence = f"[FALLBACK] Table:\n{table_md}"
            if not explanation or explanation.lower() in ["not found", "none"]:
                explanation = f"[FALLBACK] No explanation found. Review the table to reason out the answer."

            result = {
                "TableID": mapping["table_id"],
                "QuestionID": mapping["question_id"],
                "Question": mapping["question"],
                "Answer": answer,
                "Evidence": evidence,
                "Explanation": explanation
            }

            results.append(result)
        else:
            # If no match at all — full fallback
            results.append({
                "TableID": mapping["table_id"],
                "QuestionID": mapping["question_id"],
                "Question": mapping["question"],
                "Answer": "Not found",
                "Evidence": f"[FALLBACK] Table:\n{table_md}",
                "Explanation": "[FALLBACK] No structured answer parsed. Review manually or re-run with stronger model."
            })

    return results


In [8]:
from tqdm import tqdm

input_file = "tatqa.json"
output_file = "tablegpt_output_results.json"

with open(output_file, "w", encoding="utf-8") as f:
    f.write("[\n")

first = True
for batch_index, batch in enumerate(tqdm(stream_json_file(input_file, batch_size=10), desc="Running Batches")):
    results = process_batch(batch)
    with open(output_file, "a", encoding="utf-8") as f:
        for result in results:
            if not first:
                f.write(",\n")
            json.dump(result, f, indent=2)
            first = False

with open(output_file, "a", encoding="utf-8") as f:
    f.write("\n]")


Running Batches: 730it [1:24:56,  6.98s/it]


In [23]:
result

{'TableID': '991d23d7-f32d-4954-8e1d-87ad22470fcf',
 'QuestionID': '847400ae-6d72-4afa-9b2c-c9677578034a',
 'Question': 'What is the change in the net sales for cheese between 2018 and 2019?',
 'Answer': 'To determine the change in net sales for cheese between 2018 and 2019, we need to compare the values for cheese in both years.\n\nFrom the table:\n- In 2018, the net sales for cheese were $11,486.\n- In 2019, the net sales for cheese were $11,459.\n\nThe change in net sales for cheese can be calculated as follows:\n\\[ \\text{Change} = \\text{Net Sales in 2019} - \\text{Net Sales in 2018} \\]\n\\[ \\text{Change} = 11,459 - 11,486 \\]\n\\[ \\text{Change} = -27 \\]\n\nTherefore, the net sales for cheese decreased by $27 from 2018 to 2019.',
 'Evidence': '- The net sales for cheese in 2018 were $11,486.\n- The net sales for cheese in 2019 were $11,459.',
 'Explanation': 'The decrease in net sales for cheese from 2018 to'}

In [None]:
import json
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Load a lightweight sentence embedding model
embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# === Load Ground Truth and Generated Responses ===
with open("easy_tatqa_dataset.json", "r") as gt_file:
    ground_truth_data = json.load(gt_file)  

with open("output_results.json", "r") as gen_file:
    generated_data = json.load(gen_file)

# === Extract Questions & Answers ===
ground_truth_answers = {
    q["question"]: str(q["answer"])
    for entry in ground_truth_data  
    for q in entry["questions"]
}

generated_answers = {item["Question"]: item["Response"] for item in generated_data}

# === Compute Cosine Similarity ===
def compute_cosine_similarity():
    cosine_scores = []

    for question, gt_answer in ground_truth_answers.items():
        gen_answer = generated_answers.get(question, "")

        # Encode both answers using the sentence transformer
        emb_gt = embedding_model.encode([gt_answer])
        emb_gen = embedding_model.encode([gen_answer])

        # Compute cosine similarity
        cos_sim = cosine_similarity(emb_gt, emb_gen)[0][0]
        cosine_scores.append(cos_sim)

    # Calculate the final average similarity score
    avg_cosine_score = sum(cosine_scores) / len(cosine_scores)
    return avg_cosine_score

# === Run Evaluation ===
final_cosine_score = compute_cosine_similarity()
print(f"\n==== Final Cosine Similarity Score ====\nCosine Similarity: {final_cosine_score:.4f}")
