In [None]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/batch_input.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 

SYSTEM_PROMPT = """You are a helpful AI assistant who understands the SPARQL Protocol and RDF Query Language.
Given a natural-language question, its gold triples, and the correct SPARQL query, explain in a few sentences
how the SPARQL query is derived from the question using the gold triples. Focus on showing the reasoning steps
that connect the question to the structure of the SPARQL query. Keep it concise (2–4 sentences)."""

FEWSHOT_EXAMPLES = """**Example 1**
Q: How many movies did Stanley Kubrick direct?
Gold triple: <?uri, director, Stanley_Kubrick>
SPARQL:
SELECT DISTINCT COUNT(?uri) WHERE {
  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Stanley_Kubrick> .
}

Expected output:
The question asks for the number of movies directed by Stanley Kubrick. The gold triple links movies (?uri) with Stanley Kubrick via the director relation. Since we need a count of movies, the query uses COUNT on distinct ?uri.

**Example 2**
Q: Who won the Lovelace Medal and the Norbert Wiener Award for Social and Professional Responsibility?
Gold triples:
- <?uri, prizes, Lovelace_Medal>
- <?uri, prizes, Norbert_Wiener_Award_for_Social_and_Professional_Responsibility>
SPARQL:
SELECT DISTINCT ?uri WHERE {
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Lovelace_Medal> .
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Norbert_Wiener_Award_for_Social_and_Professional_Responsibility> .
}

Expected output:
The question asks for a person who has both awards. The first triple retrieves people with the Lovelace Medal, and the second triple restricts to those who also have the Norbert Wiener Award. Using both triples together finds the intersection, and DISTINCT avoids duplicates.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the concise explanation (2–4 sentences), no bullets, no code, no extra headings.

Q: {question}

Gold triples:
{triples_str}

SPARQL:
{sparql}
"""

MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            # REQUIRED: custom_id + method + url + body
            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,  # Added max_tokens here
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()

Building batch JSONL: 100%|██████████| 150/150 [00:00<00:00, 31383.53it/s]

Wrote 745 requests to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/batch_input.jsonl





In [10]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "QALD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


Uploaded file: file-MJ7qmrGcU3L8GNpffQY6yd
Batch ID: batch_68a4ebf7681081908ef2603f60a1c7d5
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_cot_batch_output.jsonl


In [None]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    """
    Reads OpenAI Batch result .jsonl and returns: custom_id -> assistant content (or error text)
    """
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    """
    Heuristic: find the list field that holds the entries.
    Prefer lists whose elements have 'dynamic_pairs'.
    Fallback to the first list-of-dicts field.
    """
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 745/745 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json


In [None]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

triples_limit = 10
NUM_DEMOS = 1

input_path  = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::Bk9BchWy"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

def _coerce_triple(entry: Any) -> Union[str, List[str]]:
    if isinstance(entry, dict) and "triple" in entry:
        entry = entry["triple"]

    if isinstance(entry, dict):
        if {"s", "p", "o"} <= set(entry.keys()):
            return [str(entry["s"]), str(entry["p"]), str(entry["o"])]
        if {"subject", "predicate", "object"} <= set(entry.keys()):
            return [str(entry["subject"]), str(entry["predicate"]), str(entry["object"])]

    if isinstance(entry, (list, tuple)) and len(entry) == 3:
        return [str(entry[0]), str(entry[1]), str(entry[2])]

    if isinstance(entry, str):
        return entry.strip()

    return str(entry)

def _format_triples_for_prompt(seq: List[Any], limit: int) -> str:
    lines: List[str] = []
    for i, raw in enumerate(seq[:limit], 1):
        t = _coerce_triple(raw)
        if isinstance(t, str):
            triple_str = t
        else:
            triple_str = " ".join(map(str, t))
        lines.append(f"{i}. {triple_str}")
    return "\n".join(lines) if lines else "(none)"

def _get_triple_candidates(sample: Dict[str, Any]) -> List[Any]:
    candidate_keys: Iterable[str] = (
        "retrived_triples_ranked", 
        "retrieved_triples_ranked",
        "retrieved_triples_top10",
        "retrieved_triples",
        "triples",
    )
    for k in candidate_keys:
        if k in sample and sample[k]:
            return sample[k]
    return []

GENERIC_INSTR = (
    'Given a specific question and up to ten potentially relevant triples, '
    'generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()  # Get the COT here

        demo_triples_seq = (
            demo.get("retrieved_triples_top10")
            or demo.get("retrived_triples_ranked")
            or demo.get("retrieved_triples_ranked")
            or demo.get("retrieved_triples")
            or demo.get("triples")
            or []
        )
        demo_triples_str = _format_triples_for_prompt(demo_triples_seq, triples_limit)

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )

        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Candidate Triples (numbered, max 10):\n{demo_triples_str}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question and up to ten potentially relevant triples, generate the\n"
        "corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\n"
        'with key "sparql" and the query as its string value.\n\n'
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        triples_seq = _get_triple_candidates(sample)
        triples_str = _format_triples_for_prompt(triples_seq, triples_limit)

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}\n\nCandidate Triples (max 10, numbered):\n{triples_str}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 150 batch lines to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question and up to ten potentially relevant triples, generate the\ncorresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\nwith key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhat is the timezone in San Pedro de Atacama?\n\nCandidate Triples (numbered, max 10):\n1. res:San_Pedro_de_Atacama dbo:timeZone res:Time_in_Chile\n2. res:San_Pedro_de_Atacama dbp:timezone res:Time_in_Chile\n3. res:San_Pedro_de_Atacama dbo:wikiPageWikiLink res:Time_in_Chile\n4. res:2021_AV7 dbp:discoverySite res:San_Pedro_de_Atacama\n5. res:2021_AV7 dbo:wikiPageWikiLink res:San_Pedro_de_Atacama\n6. res:1577 dbo:wikiPageWikiLink res:San_Pe

In [16]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "QALD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

Uploaded file: file-QPu1RvcmFQHECxH7DFjsoU
Batch ID: batch_68a4f82556cc8190bcf0f86632a8549c
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: finalizing
Status: completed
Saved outputs


In [17]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_plus_gold.json. Total records: 150


3 Examples

In [18]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

triples_limit = 10
NUM_DEMOS = 3

input_path  = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::Bk9BchWy"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

def _coerce_triple(entry: Any) -> Union[str, List[str]]:
    if isinstance(entry, dict) and "triple" in entry:
        entry = entry["triple"]

    if isinstance(entry, dict):
        if {"s", "p", "o"} <= set(entry.keys()):
            return [str(entry["s"]), str(entry["p"]), str(entry["o"])]
        if {"subject", "predicate", "object"} <= set(entry.keys()):
            return [str(entry["subject"]), str(entry["predicate"]), str(entry["object"])]

    if isinstance(entry, (list, tuple)) and len(entry) == 3:
        return [str(entry[0]), str(entry[1]), str(entry[2])]

    if isinstance(entry, str):
        return entry.strip()

    return str(entry)

def _format_triples_for_prompt(seq: List[Any], limit: int) -> str:
    lines: List[str] = []
    for i, raw in enumerate(seq[:limit], 1):
        t = _coerce_triple(raw)
        if isinstance(t, str):
            triple_str = t
        else:
            triple_str = " ".join(map(str, t))
        lines.append(f"{i}. {triple_str}")
    return "\n".join(lines) if lines else "(none)"

def _get_triple_candidates(sample: Dict[str, Any]) -> List[Any]:
    candidate_keys: Iterable[str] = (
        "retrived_triples_ranked", 
        "retrieved_triples_ranked",
        "retrieved_triples_top10",
        "retrieved_triples",
        "triples",
    )
    for k in candidate_keys:
        if k in sample and sample[k]:
            return sample[k]
    return []

GENERIC_INSTR = (
    'Given a specific question and up to ten potentially relevant triples, '
    'generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()  # Get the COT here

        demo_triples_seq = (
            demo.get("retrieved_triples_top10")
            or demo.get("retrived_triples_ranked")
            or demo.get("retrieved_triples_ranked")
            or demo.get("retrieved_triples")
            or demo.get("triples")
            or []
        )
        demo_triples_str = _format_triples_for_prompt(demo_triples_seq, triples_limit)

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )

        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Candidate Triples (numbered, max 10):\n{demo_triples_str}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question and up to ten potentially relevant triples, generate the\n"
        "corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\n"
        'with key "sparql" and the query as its string value.\n\n'
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        triples_seq = _get_triple_candidates(sample)
        triples_str = _format_triples_for_prompt(triples_seq, triples_limit)

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}\n\nCandidate Triples (max 10, numbered):\n{triples_str}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 150 batch lines to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question and up to ten potentially relevant triples, generate the\ncorresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\nwith key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhat is the timezone in San Pedro de Atacama?\n\nCandidate Triples (numbered, max 10):\n1. res:San_Pedro_de_Atacama dbo:timeZone res:Time_in_Chile\n2. res:San_Pedro_de_Atacama dbp:timezone res:Time_in_Chile\n3. res:San_Pedro_de_Atacama dbo:wikiPageWikiLink res:Time_in_Chile\n4. res:2021_AV7 dbp:discoverySite res:San_Pedro_de_Atacama\n5. res:2021_AV7 dbo:wikiPageWikiLink res:San_Pedro_de_Atacama\n6. res:1577 dbo:wikiPageWikiLink res:San_Pe

In [19]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "QALD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

Uploaded file: file-RTmZTxxye4Jtx3VRQc3jRP
Batch ID: batch_68a4f9e639a08190bf1c2c98f9c22161
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: completed
Saved outputs


In [20]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_plus_gold.json. Total records: 150


5 Examples

In [21]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

triples_limit = 10
NUM_DEMOS = 5

input_path  = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::Bk9BchWy"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

def _coerce_triple(entry: Any) -> Union[str, List[str]]:
    if isinstance(entry, dict) and "triple" in entry:
        entry = entry["triple"]

    if isinstance(entry, dict):
        if {"s", "p", "o"} <= set(entry.keys()):
            return [str(entry["s"]), str(entry["p"]), str(entry["o"])]
        if {"subject", "predicate", "object"} <= set(entry.keys()):
            return [str(entry["subject"]), str(entry["predicate"]), str(entry["object"])]

    if isinstance(entry, (list, tuple)) and len(entry) == 3:
        return [str(entry[0]), str(entry[1]), str(entry[2])]

    if isinstance(entry, str):
        return entry.strip()

    return str(entry)

def _format_triples_for_prompt(seq: List[Any], limit: int) -> str:
    lines: List[str] = []
    for i, raw in enumerate(seq[:limit], 1):
        t = _coerce_triple(raw)
        if isinstance(t, str):
            triple_str = t
        else:
            triple_str = " ".join(map(str, t))
        lines.append(f"{i}. {triple_str}")
    return "\n".join(lines) if lines else "(none)"

def _get_triple_candidates(sample: Dict[str, Any]) -> List[Any]:
    candidate_keys: Iterable[str] = (
        "retrived_triples_ranked", 
        "retrieved_triples_ranked",
        "retrieved_triples_top10",
        "retrieved_triples",
        "triples",
    )
    for k in candidate_keys:
        if k in sample and sample[k]:
            return sample[k]
    return []

GENERIC_INSTR = (
    'Given a specific question and up to ten potentially relevant triples, '
    'generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()  # Get the COT here

        demo_triples_seq = (
            demo.get("retrieved_triples_top10")
            or demo.get("retrived_triples_ranked")
            or demo.get("retrieved_triples_ranked")
            or demo.get("retrieved_triples")
            or demo.get("triples")
            or []
        )
        demo_triples_str = _format_triples_for_prompt(demo_triples_seq, triples_limit)

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )

        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Candidate Triples (numbered, max 10):\n{demo_triples_str}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question and up to ten potentially relevant triples, generate the\n"
        "corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\n"
        'with key "sparql" and the query as its string value.\n\n'
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        triples_seq = _get_triple_candidates(sample)
        triples_str = _format_triples_for_prompt(triples_seq, triples_limit)

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}\n\nCandidate Triples (max 10, numbered):\n{triples_str}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 150 batch lines to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question and up to ten potentially relevant triples, generate the\ncorresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\nwith key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhat is the timezone in San Pedro de Atacama?\n\nCandidate Triples (numbered, max 10):\n1. res:San_Pedro_de_Atacama dbo:timeZone res:Time_in_Chile\n2. res:San_Pedro_de_Atacama dbp:timezone res:Time_in_Chile\n3. res:San_Pedro_de_Atacama dbo:wikiPageWikiLink res:Time_in_Chile\n4. res:2021_AV7 dbp:discoverySite res:San_Pedro_de_Atacama\n5. res:2021_AV7 dbo:wikiPageWikiLink res:San_Pedro_de_Atacama\n6. res:1577 dbo:wikiPageWikiLink res:San_Pe

In [22]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "QALD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

Uploaded file: file-7a4MCaBz8oX9WJojwUvLmQ
Batch ID: batch_68a4fed4775c81909b8f736eef3d021d
Status: validating
Status: validating
Status: in_progress
Status: completed
Saved outputs


In [23]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_with_cot.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_plus_gold.json. Total records: 150


LcQUAD

In [24]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs_batch_input.jsonl"
MODEL        = "gpt-4.1-nano"
TEMPERATURE  = 0.0
RESUME       = True 

SYSTEM_PROMPT = """You are a helpful AI assistant who understands the SPARQL Protocol and RDF Query Language.
Given a natural-language question, its gold triples, and the correct SPARQL query, explain in a few sentences
how the SPARQL query is derived from the question using the gold triples. Focus on showing the reasoning steps
that connect the question to the structure of the SPARQL query. Keep it concise (2–4 sentences)."""

FEWSHOT_EXAMPLES = """**Example 1**
Q: How many movies did Stanley Kubrick direct?
Gold triple: <?uri, director, Stanley_Kubrick>
SPARQL:
SELECT DISTINCT COUNT(?uri) WHERE {
  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Stanley_Kubrick> .
}

Expected output:
The question asks for the number of movies directed by Stanley Kubrick. The gold triple links movies (?uri) with Stanley Kubrick via the director relation. Since we need a count of movies, the query uses COUNT on distinct ?uri.

**Example 2**
Q: Who won the Lovelace Medal and the Norbert Wiener Award for Social and Professional Responsibility?
Gold triples:
- <?uri, prizes, Lovelace_Medal>
- <?uri, prizes, Norbert_Wiener_Award_for_Social_and_Professional_Responsibility>
SPARQL:
SELECT DISTINCT ?uri WHERE {
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Lovelace_Medal> .
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Norbert_Wiener_Award_for_Social_and_Professional_Responsibility> .
}

Expected output:
The question asks for a person who has both awards. The first triple retrieves people with the Lovelace Medal, and the second triple restricts to those who also have the Norbert Wiener Award. Using both triples together finds the intersection, and DISTINCT avoids duplicates.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the concise explanation (2–4 sentences), no bullets, no code, no extra headings.

Q: {question}

Gold triples:
{triples_str}

SPARQL:
{sparql}
"""

MODEL       = "gpt-4.1-nano"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            # REQUIRED: custom_id + method + url + body
            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,  # Added max_tokens here
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()

Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 28613.65it/s]


Wrote 4955 requests to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs_batch_input.jsonl


In [None]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_dynamic_pairs_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()

In [26]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_dynamic_pairs_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 4954/4955 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_dynamic_pairs_with_cot.json


In [27]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

triples_limit = 10
NUM_DEMOS = 1

input_path  = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::Br5K42ie"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

def _coerce_triple(entry: Any) -> Union[str, List[str]]:
    if isinstance(entry, dict) and "triple" in entry:
        entry = entry["triple"]

    if isinstance(entry, dict):
        if {"s", "p", "o"} <= set(entry.keys()):
            return [str(entry["s"]), str(entry["p"]), str(entry["o"])]
        if {"subject", "predicate", "object"} <= set(entry.keys()):
            return [str(entry["subject"]), str(entry["predicate"]), str(entry["object"])]

    if isinstance(entry, (list, tuple)) and len(entry) == 3:
        return [str(entry[0]), str(entry[1]), str(entry[2])]

    if isinstance(entry, str):
        return entry.strip()

    return str(entry)

def _format_triples_for_prompt(seq: List[Any], limit: int) -> str:
    lines: List[str] = []
    for i, raw in enumerate(seq[:limit], 1):
        t = _coerce_triple(raw)
        if isinstance(t, str):
            triple_str = t
        else:
            triple_str = " ".join(map(str, t))
        lines.append(f"{i}. {triple_str}")
    return "\n".join(lines) if lines else "(none)"

def _get_triple_candidates(sample: Dict[str, Any]) -> List[Any]:
    candidate_keys: Iterable[str] = (
        "retrived_triples_ranked", 
        "retrieved_triples_ranked",
        "retrieved_triples_top10",
        "retrieved_triples",
        "triples",
    )
    for k in candidate_keys:
        if k in sample and sample[k]:
            return sample[k]
    return []

GENERIC_INSTR = (
    'Given a specific question and up to ten potentially relevant triples, '
    'generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()  # Get the COT here

        demo_triples_seq = (
            demo.get("retrieved_triples_top10")
            or demo.get("retrived_triples_ranked")
            or demo.get("retrieved_triples_ranked")
            or demo.get("retrieved_triples")
            or demo.get("triples")
            or []
        )
        demo_triples_str = _format_triples_for_prompt(demo_triples_seq, triples_limit)

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )

        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Candidate Triples (numbered, max 10):\n{demo_triples_str}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question and up to ten potentially relevant triples, generate the\n"
        "corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\n"
        'with key "sparql" and the query as its string value.\n\n'
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        triples_seq = _get_triple_candidates(sample)
        triples_str = _format_triples_for_prompt(triples_seq, triples_limit)

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}\n\nCandidate Triples (max 10, numbered):\n{triples_str}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 1000 batch lines to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question and up to ten potentially relevant triples, generate the\ncorresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\nwith key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhich writer of Tales of Suspense is also the writer of karakuri Dji Ultimo ?\n\nCandidate Triples (numbered, max 10):\n1. res:Tales_of_Suspense dbo:wikiPageWikiLink res:James_Robinson_(writer)\n2. res:James_Robinson_(writer) dbo:wikiPageWikiLink res:Tales_of_Suspense\n3. res:Tales_of_Suspense dbo:writer res:Larry_Lieber\n4. res:Tales_of_Suspense dbo:writer res:Robert_Bernstein_(comics)\n5. res:Tales_of_Suspense dbo:writer res:Stan_L

In [29]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "LcQUAD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

Uploaded file: file-XEtwz3XHDuMbE2vbaq9oLL
Batch ID: batch_68a5530e86a48190a09b9d52b797dba5
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: completed
Saved outputs


In [30]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_1_dynamic_pairs_with_cot_plus_gold.json. Total records: 1000


3 Example

In [31]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

triples_limit = 10
NUM_DEMOS = 3

input_path  = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::Br5K42ie"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

def _coerce_triple(entry: Any):
    if isinstance(entry, dict) and "triple" in entry:
        entry = entry["triple"]

    if isinstance(entry, dict):
        if {"s", "p", "o"} <= set(entry.keys()):
            return [str(entry["s"]), str(entry["p"]), str(entry["o"])]
        if {"subject", "predicate", "object"} <= set(entry.keys()):
            return [str(entry["subject"]), str(entry["predicate"]), str(entry["object"])]

    if isinstance(entry, (list, tuple)) and len(entry) == 3:
        return [str(entry[0]), str(entry[1]), str(entry[2])]

    if isinstance(entry, str):
        return entry.strip()

    return str(entry)

def _format_triples_for_prompt(seq: List[Any], limit: int) -> str:
    lines: List[str] = []
    for i, raw in enumerate(seq[:limit], 1):
        t = _coerce_triple(raw)
        if isinstance(t, str):
            triple_str = t
        else:
            triple_str = " ".join(map(str, t))
        lines.append(f"{i}. {triple_str}")
    return "\n".join(lines) if lines else "(none)"

def _get_triple_candidates(sample: Dict[str, Any]) -> List[Any]:
    candidate_keys: Iterable[str] = (
        "retrived_triples_ranked", 
        "retrieved_triples_ranked",
        "retrieved_triples_top10",
        "retrieved_triples",
        "triples",
    )
    for k in candidate_keys:
        if k in sample and sample[k]:
            return sample[k]
    return []

GENERIC_INSTR = (
    'Given a specific question and up to ten potentially relevant triples, '
    'generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()  # Get the COT here

        demo_triples_seq = (
            demo.get("retrieved_triples_top10")
            or demo.get("retrived_triples_ranked")
            or demo.get("retrieved_triples_ranked")
            or demo.get("retrieved_triples")
            or demo.get("triples")
            or []
        )
        demo_triples_str = _format_triples_for_prompt(demo_triples_seq, triples_limit)

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )

        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Candidate Triples (numbered, max 10):\n{demo_triples_str}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question and up to ten potentially relevant triples, generate the\n"
        "corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\n"
        'with key "sparql" and the query as its string value.\n\n'
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        triples_seq = _get_triple_candidates(sample)
        triples_str = _format_triples_for_prompt(triples_seq, triples_limit)

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}\n\nCandidate Triples (max 10, numbered):\n{triples_str}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 1000 batch lines to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question and up to ten potentially relevant triples, generate the\ncorresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\nwith key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhich writer of Tales of Suspense is also the writer of karakuri Dji Ultimo ?\n\nCandidate Triples (numbered, max 10):\n1. res:Tales_of_Suspense dbo:wikiPageWikiLink res:James_Robinson_(writer)\n2. res:James_Robinson_(writer) dbo:wikiPageWikiLink res:Tales_of_Suspense\n3. res:Tales_of_Suspense dbo:writer res:Larry_Lieber\n4. res:Tales_of_Suspense dbo:writer res:Robert_Bernstein_(comics)\n5. res:Tales_of_Suspense dbo:writer res:Stan_L

In [32]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "LcQUAD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

Uploaded file: file-Q2nbpXckrNQo9xkWS7zRAx
Batch ID: batch_68a55831cd48819091b4b5b1da2cbe73
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: finalizing
Status: completed
Saved outputs


In [33]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_3_dynamic_pairs_with_cot_plus_gold.json. Total records: 1000


5 Examples

In [34]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

triples_limit = 10
NUM_DEMOS = 5

input_path  = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_dynamic_pairs_with_cot.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::Br5K42ie"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

def _coerce_triple(entry: Any):
    if isinstance(entry, dict) and "triple" in entry:
        entry = entry["triple"]

    if isinstance(entry, dict):
        if {"s", "p", "o"} <= set(entry.keys()):
            return [str(entry["s"]), str(entry["p"]), str(entry["o"])]
        if {"subject", "predicate", "object"} <= set(entry.keys()):
            return [str(entry["subject"]), str(entry["predicate"]), str(entry["object"])]

    if isinstance(entry, (list, tuple)) and len(entry) == 3:
        return [str(entry[0]), str(entry[1]), str(entry[2])]

    if isinstance(entry, str):
        return entry.strip()

    return str(entry)

def _format_triples_for_prompt(seq: List[Any], limit: int) -> str:
    lines: List[str] = []
    for i, raw in enumerate(seq[:limit], 1):
        t = _coerce_triple(raw)
        if isinstance(t, str):
            triple_str = t
        else:
            triple_str = " ".join(map(str, t))
        lines.append(f"{i}. {triple_str}")
    return "\n".join(lines) if lines else "(none)"

def _get_triple_candidates(sample: Dict[str, Any]) -> List[Any]:
    candidate_keys: Iterable[str] = (
        "retrived_triples_ranked", 
        "retrieved_triples_ranked",
        "retrieved_triples_top10",
        "retrieved_triples",
        "triples",
    )
    for k in candidate_keys:
        if k in sample and sample[k]:
            return sample[k]
    return []

GENERIC_INSTR = (
    'Given a specific question and up to ten potentially relevant triples, '
    'generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()  # Get the COT here

        demo_triples_seq = (
            demo.get("retrieved_triples_top10")
            or demo.get("retrived_triples_ranked")
            or demo.get("retrieved_triples_ranked")
            or demo.get("retrieved_triples")
            or demo.get("triples")
            or []
        )
        demo_triples_str = _format_triples_for_prompt(demo_triples_seq, triples_limit)

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )

        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Candidate Triples (numbered, max 10):\n{demo_triples_str}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question and up to ten potentially relevant triples, generate the\n"
        "corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\n"
        'with key "sparql" and the query as its string value.\n\n'
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        triples_seq = _get_triple_candidates(sample)
        triples_str = _format_triples_for_prompt(triples_seq, triples_limit)

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}\n\nCandidate Triples (max 10, numbered):\n{triples_str}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 1000 batch lines to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question and up to ten potentially relevant triples, generate the\ncorresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\nwith key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhich writer of Tales of Suspense is also the writer of karakuri Dji Ultimo ?\n\nCandidate Triples (numbered, max 10):\n1. res:Tales_of_Suspense dbo:wikiPageWikiLink res:James_Robinson_(writer)\n2. res:James_Robinson_(writer) dbo:wikiPageWikiLink res:Tales_of_Suspense\n3. res:Tales_of_Suspense dbo:writer res:Larry_Lieber\n4. res:Tales_of_Suspense dbo:writer res:Robert_Bernstein_(comics)\n5. res:Tales_of_Suspense dbo:writer res:Stan_L

In [35]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "LcQUAD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

Uploaded file: file-KEtS9QDUXJf5pnhDM2fxNV
Batch ID: batch_68a55dcd654c8190a6833b2069ddcbc3
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: completed
Saved outputs


In [36]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_gold_with_dynamic_pairs.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_plus_5_dynamic_pairs_with_cot_plus_gold.json. Total records: 1000


LcQUAD-Mistral

In [1]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_gold_with_dynamic_pairs_batch_input.jsonl"
MODEL        = "gpt-4.1-nano"
TEMPERATURE  = 0.0
RESUME       = True 

SYSTEM_PROMPT = """You are a helpful AI assistant who understands the SPARQL Protocol and RDF Query Language.
Given a natural-language question, its gold triples, and the correct SPARQL query, explain in a few sentences
how the SPARQL query is derived from the question using the gold triples. Focus on showing the reasoning steps
that connect the question to the structure of the SPARQL query. Keep it concise (2–4 sentences)."""

FEWSHOT_EXAMPLES = """**Example 1**
Q: How many movies did Stanley Kubrick direct?
Gold triple: <?uri, director, Stanley_Kubrick>
SPARQL:
SELECT DISTINCT COUNT(?uri) WHERE {
  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Stanley_Kubrick> .
}

Expected output:
The question asks for the number of movies directed by Stanley Kubrick. The gold triple links movies (?uri) with Stanley Kubrick via the director relation. Since we need a count of movies, the query uses COUNT on distinct ?uri.

**Example 2**
Q: Who won the Lovelace Medal and the Norbert Wiener Award for Social and Professional Responsibility?
Gold triples:
- <?uri, prizes, Lovelace_Medal>
- <?uri, prizes, Norbert_Wiener_Award_for_Social_and_Professional_Responsibility>
SPARQL:
SELECT DISTINCT ?uri WHERE {
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Lovelace_Medal> .
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Norbert_Wiener_Award_for_Social_and_Professional_Responsibility> .
}

Expected output:
The question asks for a person who has both awards. The first triple retrieves people with the Lovelace Medal, and the second triple restricts to those who also have the Norbert Wiener Award. Using both triples together finds the intersection, and DISTINCT avoids duplicates.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the concise explanation (2–4 sentences), no bullets, no code, no extra headings.

Q: {question}

Gold triples:
{triples_str}

SPARQL:
{sparql}
"""

MODEL       = "gpt-4.1-nano"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            # REQUIRED: custom_id + method + url + body
            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,  # Added max_tokens here
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()

Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 23632.68it/s]

Wrote 5000 requests to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_gold_with_dynamic_pairs_batch_input.jsonl





In [2]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_gold_with_dynamic_pairs_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()

Uploaded file: file-4GVdfxGzZJsXZ6nNamDmyp
Batch ID: batch_68a8a8d051d4819097d17d92229a0b89
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl


In [3]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_with_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 5000/5000 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_with_cot.json


QALD-Mistral

In [6]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_gold_with_dynamic_cot_pairs_batch_input.jsonl"
MODEL        = "gpt-4.1-nano"
TEMPERATURE  = 0.0
RESUME       = True 

SYSTEM_PROMPT = """You are a helpful AI assistant who understands the SPARQL Protocol and RDF Query Language.
Given a natural-language question, its gold triples, and the correct SPARQL query, explain in a few sentences
how the SPARQL query is derived from the question using the gold triples. Focus on showing the reasoning steps
that connect the question to the structure of the SPARQL query. Keep it concise (2–4 sentences)."""

FEWSHOT_EXAMPLES = """**Example 1**
Q: How many movies did Stanley Kubrick direct?
Gold triple: <?uri, director, Stanley_Kubrick>
SPARQL:
SELECT DISTINCT COUNT(?uri) WHERE {
  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Stanley_Kubrick> .
}

Expected output:
The question asks for the number of movies directed by Stanley Kubrick. The gold triple links movies (?uri) with Stanley Kubrick via the director relation. Since we need a count of movies, the query uses COUNT on distinct ?uri.

**Example 2**
Q: Who won the Lovelace Medal and the Norbert Wiener Award for Social and Professional Responsibility?
Gold triples:
- <?uri, prizes, Lovelace_Medal>
- <?uri, prizes, Norbert_Wiener_Award_for_Social_and_Professional_Responsibility>
SPARQL:
SELECT DISTINCT ?uri WHERE {
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Lovelace_Medal> .
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Norbert_Wiener_Award_for_Social_and_Professional_Responsibility> .
}

Expected output:
The question asks for a person who has both awards. The first triple retrieves people with the Lovelace Medal, and the second triple restricts to those who also have the Norbert Wiener Award. Using both triples together finds the intersection, and DISTINCT avoids duplicates.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the concise explanation (2–4 sentences), no bullets, no code, no extra headings.

Q: {question}

Gold triples:
{triples_str}

SPARQL:
{sparql}
"""

MODEL       = "gpt-4.1-nano"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            # REQUIRED: custom_id + method + url + body
            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,  # Added max_tokens here
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()

Building batch JSONL: 100%|██████████| 150/150 [00:00<00:00, 32643.89it/s]

Wrote 750 requests to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_gold_with_dynamic_cot_pairs_batch_input.jsonl





In [7]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_gold_with_dynamic_cot_pairs_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "QALD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()

Uploaded file: file-LiTBLvkRoTinGsV4SKYGYR
Batch ID: batch_68a8b218d8b08190a990806c67a35705
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl


In [8]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_dynamic_pairs_with_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 750/750 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_mistral_plus_dynamic_pairs_with_cot.json


LcQUAD-Mistral 7B-v01

In [1]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_gold_with_dynamic_cot_pairs_batch_input.jsonl"
MODEL        = "gpt-4.1-nano"
TEMPERATURE  = 0.0
RESUME       = True 

SYSTEM_PROMPT = """You are a helpful AI assistant who understands the SPARQL Protocol and RDF Query Language.
Given a natural-language question, its gold triples, and the correct SPARQL query, explain in a few sentences
how the SPARQL query is derived from the question using the gold triples. Focus on showing the reasoning steps
that connect the question to the structure of the SPARQL query. Keep it concise (2–4 sentences)."""

FEWSHOT_EXAMPLES = """**Example 1**
Q: How many movies did Stanley Kubrick direct?
Gold triple: <?uri, director, Stanley_Kubrick>
SPARQL:
SELECT DISTINCT COUNT(?uri) WHERE {
  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Stanley_Kubrick> .
}

Expected output:
The question asks for the number of movies directed by Stanley Kubrick. The gold triple links movies (?uri) with Stanley Kubrick via the director relation. Since we need a count of movies, the query uses COUNT on distinct ?uri.

**Example 2**
Q: Who won the Lovelace Medal and the Norbert Wiener Award for Social and Professional Responsibility?
Gold triples:
- <?uri, prizes, Lovelace_Medal>
- <?uri, prizes, Norbert_Wiener_Award_for_Social_and_Professional_Responsibility>
SPARQL:
SELECT DISTINCT ?uri WHERE {
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Lovelace_Medal> .
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Norbert_Wiener_Award_for_Social_and_Professional_Responsibility> .
}

Expected output:
The question asks for a person who has both awards. The first triple retrieves people with the Lovelace Medal, and the second triple restricts to those who also have the Norbert Wiener Award. Using both triples together finds the intersection, and DISTINCT avoids duplicates.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the concise explanation (2–4 sentences), no bullets, no code, no extra headings.

Q: {question}

Gold triples:
{triples_str}

SPARQL:
{sparql}
"""

MODEL       = "gpt-4.1-nano"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            # REQUIRED: custom_id + method + url + body
            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,  # Added max_tokens here
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()

Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 25242.71it/s]

Wrote 5000 requests to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_gold_with_dynamic_cot_pairs_batch_input.jsonl





In [2]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_gold_with_dynamic_cot_pairs_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()

Uploaded file: file-Q3JTa4KqZ76z5ir63q1RxQ
Batch ID: batch_68a9e185217c8190b0cc1d35fda0efaf
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in

In [3]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_with_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 5000/5000 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/lcquad_test_solo_stage_10_mistral_plus_dynamic_pairs_with_cot.json


Mixtral 8x7-v01----------------QALD

In [1]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_with_dynamic_cot_pairs_batch_input.jsonl"
MODEL        = "gpt-4.1-nano"
TEMPERATURE  = 0.0
RESUME       = True 

SYSTEM_PROMPT = """You are a helpful AI assistant who understands the SPARQL Protocol and RDF Query Language.
Given a natural-language question, its gold triples, and the correct SPARQL query, explain in a few sentences
how the SPARQL query is derived from the question using the gold triples. Focus on showing the reasoning steps
that connect the question to the structure of the SPARQL query. Keep it concise (2–4 sentences)."""

FEWSHOT_EXAMPLES = """**Example 1**
Q: How many movies did Stanley Kubrick direct?
Gold triple: <?uri, director, Stanley_Kubrick>
SPARQL:
SELECT DISTINCT COUNT(?uri) WHERE {
  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Stanley_Kubrick> .
}

Expected output:
The question asks for the number of movies directed by Stanley Kubrick. The gold triple links movies (?uri) with Stanley Kubrick via the director relation. Since we need a count of movies, the query uses COUNT on distinct ?uri.

**Example 2**
Q: Who won the Lovelace Medal and the Norbert Wiener Award for Social and Professional Responsibility?
Gold triples:
- <?uri, prizes, Lovelace_Medal>
- <?uri, prizes, Norbert_Wiener_Award_for_Social_and_Professional_Responsibility>
SPARQL:
SELECT DISTINCT ?uri WHERE {
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Lovelace_Medal> .
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Norbert_Wiener_Award_for_Social_and_Professional_Responsibility> .
}

Expected output:
The question asks for a person who has both awards. The first triple retrieves people with the Lovelace Medal, and the second triple restricts to those who also have the Norbert Wiener Award. Using both triples together finds the intersection, and DISTINCT avoids duplicates.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the concise explanation (2–4 sentences), no bullets, no code, no extra headings.

Q: {question}

Gold triples:
{triples_str}

SPARQL:
{sparql}
"""

MODEL       = "gpt-4.1-nano"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            # REQUIRED: custom_id + method + url + body
            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,  # Added max_tokens here
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()

Building batch JSONL: 100%|██████████| 150/150 [00:00<00:00, 26812.09it/s]

Wrote 750 requests to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_with_dynamic_cot_pairs_batch_input.jsonl





In [2]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_with_dynamic_cot_pairs_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_with_dynamic_cot_pairs_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "QALD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()

Uploaded file: file-WBukewuTiLoE4F691Rd1bd
Batch ID: batch_68aaba65f53c81908140d3f03455c5ec
Status: validating
Status: in_progress
Status: in_progress
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_with_dynamic_cot_pairs_batch_output.jsonl


In [4]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_with_dynamic_cot_pairs_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_plus_dynamic_pairs_with_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 750/750 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/mixtral/qald_test_solo_stage_10_mixtral_plus_dynamic_pairs_with_cot.json


Mixtral 8x7-v01----------------LcQUAD

In [6]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_with_dynamic_cot_pairs_batch_input.jsonl"
MODEL        = "gpt-4.1-nano"
TEMPERATURE  = 0.0
RESUME       = True 

SYSTEM_PROMPT = """You are a helpful AI assistant who understands the SPARQL Protocol and RDF Query Language.
Given a natural-language question, its gold triples, and the correct SPARQL query, explain in a few sentences
how the SPARQL query is derived from the question using the gold triples. Focus on showing the reasoning steps
that connect the question to the structure of the SPARQL query. Keep it concise (2–4 sentences)."""

FEWSHOT_EXAMPLES = """**Example 1**
Q: How many movies did Stanley Kubrick direct?
Gold triple: <?uri, director, Stanley_Kubrick>
SPARQL:
SELECT DISTINCT COUNT(?uri) WHERE {
  ?uri <http://dbpedia.org/ontology/director> <http://dbpedia.org/resource/Stanley_Kubrick> .
}

Expected output:
The question asks for the number of movies directed by Stanley Kubrick. The gold triple links movies (?uri) with Stanley Kubrick via the director relation. Since we need a count of movies, the query uses COUNT on distinct ?uri.

**Example 2**
Q: Who won the Lovelace Medal and the Norbert Wiener Award for Social and Professional Responsibility?
Gold triples:
- <?uri, prizes, Lovelace_Medal>
- <?uri, prizes, Norbert_Wiener_Award_for_Social_and_Professional_Responsibility>
SPARQL:
SELECT DISTINCT ?uri WHERE {
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Lovelace_Medal> .
  ?uri <http://dbpedia.org/property/prizes> <http://dbpedia.org/resource/Norbert_Wiener_Award_for_Social_and_Professional_Responsibility> .
}

Expected output:
The question asks for a person who has both awards. The first triple retrieves people with the Lovelace Medal, and the second triple restricts to those who also have the Norbert Wiener Award. Using both triples together finds the intersection, and DISTINCT avoids duplicates.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the concise explanation (2–4 sentences), no bullets, no code, no extra headings.

Q: {question}

Gold triples:
{triples_str}

SPARQL:
{sparql}
"""

MODEL       = "gpt-4.1-nano"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            # REQUIRED: custom_id + method + url + body
            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()

Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 26259.70it/s]

Wrote 5000 requests to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_with_dynamic_cot_pairs_batch_input.jsonl





In [7]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_with_dynamic_cot_pairs_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_with_dynamic_cot_pairs_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "QALD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()

Uploaded file: file-SoDcw2Er6WazDdSFBfsiQs
Batch ID: batch_68ab5174d3c081908442c22782503c79
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_with_dynamic_cot_pairs_batch_output.jsonl


In [8]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_top_10_mixtral_with_dynamic_cot_pairs_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_10_mixtral_plus_dynamic_pairs_with_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 5000/5000 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/mixtral/lcquad_test_solo_stage_10_mixtral_plus_dynamic_pairs_with_cot.json


New Prompt

In [17]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 


SYSTEM_PROMPT = """You are an expert DBpedia/SPARQL explainer.
Your job is to generate a chain-of-thought (CoT) style explanation for a given question and its corresponding DBpedia SPARQL.
Write in the first person (“I …”). When applicable, begin with:
- Entity assignment: mapping named entities in the question to their ids (e.g., res:, dbo:)
- Predicate ids: briefly name the predicates involved
Then, in a few sentences, explain how the SPARQL answers the question.
Be precise, technical where helpful, and avoid extra fluff or markdown headings."""

FEWSHOT_EXAMPLES = """
Your task is to generate a chain-of-thought (CoT) for a pair of question and its corresponding DBPedia SPARQL. Below are example pairs. For each, provide a detailed, simplified explanation written from the perspective of "I". Use technical terms where helpful. For each example, first specify which entity ids are assigned to each named entity mentioned in the question. Also, explain the predicate ids used when needed.

1. Question: Which countries have places with more than two caves?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )
   Expected Output: Entity assignment: 'dbo:Cave' for caves, 'dbo:Country' for countries. Predicate ids: 'dbo:location' gives the location of the cave. I want to list countries that have more than two caves. I use SELECT DISTINCT to get unique country results. I first match all entities that are caves (?cave, where rdf:type dbo:Cave), then get their locations via dbo:location (?uri). Then, I ensure those locations are countries (?uri rdf:type dbo:Country). By grouping the results by country using GROUP BY and adding HAVING (COUNT(?cave) > 2), I ensure I only get countries with more than two caves.

2. Question: What is the longest river?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River { ?uri dbo:length ?l } UNION { ?uri dbp:length ?l } } ORDER BY DESC(?l) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:River' for rivers. Predicate ids: 'dbo:length', 'dbp:length' for the river length. To find the longest river, I select all rivers with ?uri a dbo:River and retrieve their lengths, checking both dbo:length and dbp:length predicates. I use UNION to ensure all lengths are covered. Then, I order the results from longest to shortest (ORDER BY DESC(?l)), and LIMIT 1 to only get the longest river.

3. Question: Do Prince Harry and Prince William have the same parents?
   SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> ASK WHERE { <http://dbpedia.org/resource/Prince_William,_Duke_of_Cambridge> dbo:parent ?x . res:Prince_Harry dbo:parent ?x }
   Expected Output: Entity assignment: 'res:Prince_Harry' and 'res:Prince_William,_Duke_of_Cambridge' for Prince Harry and Prince William. Predicate id: 'dbo:parent' is the parent relationship. This question asks if both Prince Harry and Prince William share the same parents. I use the ASK keyword for a true/false answer. I check if there is any parent (?x) linked to both Prince Harry and Prince William. If such a parent exists, the SPARQL query returns true.

4. Question: Which volcanos in Japan erupted since 2000?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:Volcano ; dbo:locatedInArea res:Japan ; dbo:eruptionYear ?date FILTER ( year(?date) >= 2000 ) }
   Expected Output: Entity assignment: 'dbo:Volcano' for volcanos, 'res:Japan' for Japan. Predicate ids: 'dbo:locatedInArea' links the volcano to Japan, 'dbo:eruptionYear' gives the eruption year. I want to find volcanos in Japan that erupted in or after the year 2000. I identify volcanos located in Japan using dbo:locatedInArea res:Japan, and then use dbo:eruptionYear to get their eruption year. I apply a FILTER with year(?date) >= 2000 to only include those since 2000.

5. Question: Give me all world heritage sites designated within the past two years.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:WorldHeritageSite . { ?uri dbp:year '2013'^^xsd:integer . } UNION { ?uri dbp:year '2014'^^xsd:integer . } }
   Expected Output: Entity assignment: 'dbo:WorldHeritageSite' for world heritage sites. Predicate ids: 'dbp:year' specifies the designation year. To find World Heritage Sites designated in the last two years, I select entities of type dbo:WorldHeritageSite using rdf:type. The pattern '?uri dbp:year '2013'^^xsd:integer' (similarly for '2014') matches sites with a 'dbp:year' property equal to an integer-valued year—here, either 2013 or 2014. The use of UNION allows us to include sites from both years. '^^xsd:integer' ensures that the year value is treated as an integer in the query. DISTINCT avoids duplicates in the results.

6. Question: Which rivers flow into a German lake?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River . ?x dbo:inflow ?uri ; a dbo:Lake ; dbo:country res:Germany }
   Expected Output: Entity assignment: 'dbo:River' for rivers, 'dbo:Lake' for lakes, 'res:Germany' for Germany. Predicate ids: 'dbo:inflow' connects the river to the lake, 'dbo:country' specifies the lake's country. I want rivers that flow into lakes in Germany. I select entities that are rivers (?uri a dbo:River), then check for lakes (?x) with dbo:inflow connecting to those rivers, and use dbo:country to restrict lakes to Germany. DISTINCT ensures each river only shows up once.

7. Question: Give me all actors starring in movies directed by and starring William Shatner.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?x dbo:director res:William_Shatner ; dbo:starring res:William_Shatner { ?x dbo:starring ?uri } UNION { ?x dbp:starring ?uri } }
   Expected Output: Entity assignment: 'res:William_Shatner' for William Shatner. Predicate ids: 'dbo:director' and 'dbo:starring' identify films directed by and starring William Shatner; 'dbp:starring' is an alternative starring predicate. I look for films that William Shatner both directed and acted in. For those films, I select co-stars using dbo:starring and dbp:starring (via UNION). DISTINCT ensures unique actor results.

8. Question: Which actor was casted in the most movies?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:Actor . ?f rdf:type dbo:Film . ?f dbo:starring ?uri . } ORDER BY DESC(COUNT(DISTINCT(?f))) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:Actor' for actors, 'dbo:Film' for films. Predicate id: 'dbo:starring' lists movie cast. To find the most prolific actor, I select all actors (?uri rdf:type dbo:Actor) and the films they've appeared in (?f rdf:type dbo:Film; ?f dbo:starring ?uri). I count the number of different films for each actor, then sort actors in descending order of movie count and use LIMIT 1 to get the one with the most appearances.

9. Question: Is Frank Herbert still alive?
   SPARQL: PREFIX dbo: http://dbpedia.org/ontology/ PREFIX res: http://dbpedia.org/resource/ ASK WHERE { OPTIONAL { res:Frank_Herbert dbo:deathDate ?date } FILTER ( ! bound(?date) ) }
   Expected Output: Entity assignment: 'res:Frank_Herbert' for Frank Herbert. Predicate id: 'dbo:deathDate' gives the individual's death date. To check if Frank Herbert is alive, I use ASK for a yes/no result. I do an OPTIONAL lookup for his dbo:deathDate. If there's no death date (i.e., the variable is unbound), the query returns true, meaning he's still alive.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the explanation in the same style as the examples:
start with "Entity assignment: ..." (and "Predicate ids: ..." if relevant), followed by a first-person explanation of how the SPARQL answers the question. No bullets, no code fences, no extra headings.

Question:
{question}

SPARQL:
{sparql}

Gold triples (if provided):
{triples_str}
"""


MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    items = items[75:]  # for testing, limit to first 100 items
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Building batch JSONL: 100%|██████████| 75/75 [00:00<00:00, 25424.13it/s]

Wrote 370 requests to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot.jsonl





In [None]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results//home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot_batch_output_from_75.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "QALD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


In [21]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evaluation/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot_plus_gold.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 745/745 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/end_to_end_evaluation/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot_plus_gold.json


In [22]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

triples_limit = 10
NUM_DEMOS = 1

input_path  = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evaluation/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot_plus_gold.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_new_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::C9Nme00Y"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

def _coerce_triple(entry: Any) -> Union[str, List[str]]:
    if isinstance(entry, dict) and "triple" in entry:
        entry = entry["triple"]

    if isinstance(entry, dict):
        if {"s", "p", "o"} <= set(entry.keys()):
            return [str(entry["s"]), str(entry["p"]), str(entry["o"])]
        if {"subject", "predicate", "object"} <= set(entry.keys()):
            return [str(entry["subject"]), str(entry["predicate"]), str(entry["object"])]

    if isinstance(entry, (list, tuple)) and len(entry) == 3:
        return [str(entry[0]), str(entry[1]), str(entry[2])]

    if isinstance(entry, str):
        return entry.strip()

    return str(entry)

def _format_triples_for_prompt(seq: List[Any], limit: int) -> str:
    lines: List[str] = []
    for i, raw in enumerate(seq[:limit], 1):
        t = _coerce_triple(raw)
        if isinstance(t, str):
            triple_str = t
        else:
            triple_str = " ".join(map(str, t))
        lines.append(f"{i}. {triple_str}")
    return "\n".join(lines) if lines else "(none)"

def _get_triple_candidates(sample: Dict[str, Any]) -> List[Any]:
    candidate_keys: Iterable[str] = (
        "retrived_triples_ranked", 
        "retrieved_triples_ranked",
        "retrieved_triples_top10",
        "retrieved_triples",
        "triples",
    )
    for k in candidate_keys:
        if k in sample and sample[k]:
            return sample[k]
    return []

GENERIC_INSTR = (
    'Given a specific question and up to ten potentially relevant triples, '
    'generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()

        demo_triples_seq = (
            demo.get("retrieved_triples_top10")
            or demo.get("retrived_triples_ranked")
            or demo.get("retrieved_triples_ranked")
            or demo.get("retrieved_triples")
            or demo.get("triples")
            or []
        )
        demo_triples_str = _format_triples_for_prompt(demo_triples_seq, triples_limit)

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )

        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Candidate Triples (numbered, max 10):\n{demo_triples_str}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question and up to ten potentially relevant triples, generate the\n"
        "corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\n"
        'with key "sparql" and the query as its string value.\n\n'
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        triples_seq = _get_triple_candidates(sample)
        triples_str = _format_triples_for_prompt(triples_seq, triples_limit)

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}\n\nCandidate Triples (max 10, numbered):\n{triples_str}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 150 batch lines to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_new_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question and up to ten potentially relevant triples, generate the\ncorresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON\nwith key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhat is the timezone in San Pedro de Atacama?\n\nCandidate Triples (numbered, max 10):\n1. res:San_Pedro_de_Atacama dbo:timeZone res:Time_in_Chile\n2. res:San_Pedro_de_Atacama dbp:timezone res:Time_in_Chile\n3. res:San_Pedro_de_Atacama dbo:wikiPageWikiLink res:Time_in_Chile\n4. res:2021_AV7 dbp:discoverySite res:San_Pedro_de_Atacama\n5. res:2021_AV7 dbo:wikiPageWikiLink res:San_Pedro_de_Atacama\n6. res:1577 dbo:wikiPageWikiLink res:Sa

In [None]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_new_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "QALD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_1_dynamic_pairs_with_new_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

In [2]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/end_to_end_evaluation/qald_results/qald_test_solo_stage_10_plus_dynamic_pairs_new_cot_plus_gold.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_new_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_new_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/qald_test_solo_stage_10_plus_5_dynamic_pairs_with_new_cot_plus_gold.json. Total records: 150


CodeLlama - QALD

In [5]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_input.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 


SYSTEM_PROMPT = """You are an expert DBpedia/SPARQL explainer.
Your job is to generate a chain-of-thought (CoT) style explanation for a given question and its corresponding DBpedia SPARQL.
Write in the first person (“I …”). When applicable, begin with:
- Entity assignment: mapping named entities in the question to their ids (e.g., res:, dbo:)
- Predicate ids: briefly name the predicates involved
Then, in a few sentences, explain how the SPARQL answers the question.
Be precise, technical where helpful, and avoid extra fluff or markdown headings."""

FEWSHOT_EXAMPLES = """
Your task is to generate a chain-of-thought (CoT) for a pair of question and its corresponding DBPedia SPARQL. Below are example pairs. For each, provide a detailed, simplified explanation written from the perspective of "I". Use technical terms where helpful. For each example, first specify which entity ids are assigned to each named entity mentioned in the question. Also, explain the predicate ids used when needed.

1. Question: Which countries have places with more than two caves?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )
   Expected Output: Entity assignment: 'dbo:Cave' for caves, 'dbo:Country' for countries. Predicate ids: 'dbo:location' gives the location of the cave. I want to list countries that have more than two caves. I use SELECT DISTINCT to get unique country results. I first match all entities that are caves (?cave, where rdf:type dbo:Cave), then get their locations via dbo:location (?uri). Then, I ensure those locations are countries (?uri rdf:type dbo:Country). By grouping the results by country using GROUP BY and adding HAVING (COUNT(?cave) > 2), I ensure I only get countries with more than two caves.

2. Question: What is the longest river?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River { ?uri dbo:length ?l } UNION { ?uri dbp:length ?l } } ORDER BY DESC(?l) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:River' for rivers. Predicate ids: 'dbo:length', 'dbp:length' for the river length. To find the longest river, I select all rivers with ?uri a dbo:River and retrieve their lengths, checking both dbo:length and dbp:length predicates. I use UNION to ensure all lengths are covered. Then, I order the results from longest to shortest (ORDER BY DESC(?l)), and LIMIT 1 to only get the longest river.

3. Question: Do Prince Harry and Prince William have the same parents?
   SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> ASK WHERE { <http://dbpedia.org/resource/Prince_William,_Duke_of_Cambridge> dbo:parent ?x . res:Prince_Harry dbo:parent ?x }
   Expected Output: Entity assignment: 'res:Prince_Harry' and 'res:Prince_William,_Duke_of_Cambridge' for Prince Harry and Prince William. Predicate id: 'dbo:parent' is the parent relationship. This question asks if both Prince Harry and Prince William share the same parents. I use the ASK keyword for a true/false answer. I check if there is any parent (?x) linked to both Prince Harry and Prince William. If such a parent exists, the SPARQL query returns true.

4. Question: Which volcanos in Japan erupted since 2000?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:Volcano ; dbo:locatedInArea res:Japan ; dbo:eruptionYear ?date FILTER ( year(?date) >= 2000 ) }
   Expected Output: Entity assignment: 'dbo:Volcano' for volcanos, 'res:Japan' for Japan. Predicate ids: 'dbo:locatedInArea' links the volcano to Japan, 'dbo:eruptionYear' gives the eruption year. I want to find volcanos in Japan that erupted in or after the year 2000. I identify volcanos located in Japan using dbo:locatedInArea res:Japan, and then use dbo:eruptionYear to get their eruption year. I apply a FILTER with year(?date) >= 2000 to only include those since 2000.

5. Question: Give me all world heritage sites designated within the past two years.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:WorldHeritageSite . { ?uri dbp:year '2013'^^xsd:integer . } UNION { ?uri dbp:year '2014'^^xsd:integer . } }
   Expected Output: Entity assignment: 'dbo:WorldHeritageSite' for world heritage sites. Predicate ids: 'dbp:year' specifies the designation year. To find World Heritage Sites designated in the last two years, I select entities of type dbo:WorldHeritageSite using rdf:type. The pattern '?uri dbp:year '2013'^^xsd:integer' (similarly for '2014') matches sites with a 'dbp:year' property equal to an integer-valued year—here, either 2013 or 2014. The use of UNION allows us to include sites from both years. '^^xsd:integer' ensures that the year value is treated as an integer in the query. DISTINCT avoids duplicates in the results.

6. Question: Which rivers flow into a German lake?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River . ?x dbo:inflow ?uri ; a dbo:Lake ; dbo:country res:Germany }
   Expected Output: Entity assignment: 'dbo:River' for rivers, 'dbo:Lake' for lakes, 'res:Germany' for Germany. Predicate ids: 'dbo:inflow' connects the river to the lake, 'dbo:country' specifies the lake's country. I want rivers that flow into lakes in Germany. I select entities that are rivers (?uri a dbo:River), then check for lakes (?x) with dbo:inflow connecting to those rivers, and use dbo:country to restrict lakes to Germany. DISTINCT ensures each river only shows up once.

7. Question: Give me all actors starring in movies directed by and starring William Shatner.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?x dbo:director res:William_Shatner ; dbo:starring res:William_Shatner { ?x dbo:starring ?uri } UNION { ?x dbp:starring ?uri } }
   Expected Output: Entity assignment: 'res:William_Shatner' for William Shatner. Predicate ids: 'dbo:director' and 'dbo:starring' identify films directed by and starring William Shatner; 'dbp:starring' is an alternative starring predicate. I look for films that William Shatner both directed and acted in. For those films, I select co-stars using dbo:starring and dbp:starring (via UNION). DISTINCT ensures unique actor results.

8. Question: Which actor was casted in the most movies?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:Actor . ?f rdf:type dbo:Film . ?f dbo:starring ?uri . } ORDER BY DESC(COUNT(DISTINCT(?f))) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:Actor' for actors, 'dbo:Film' for films. Predicate id: 'dbo:starring' lists movie cast. To find the most prolific actor, I select all actors (?uri rdf:type dbo:Actor) and the films they've appeared in (?f rdf:type dbo:Film; ?f dbo:starring ?uri). I count the number of different films for each actor, then sort actors in descending order of movie count and use LIMIT 1 to get the one with the most appearances.

9. Question: Is Frank Herbert still alive?
   SPARQL: PREFIX dbo: http://dbpedia.org/ontology/ PREFIX res: http://dbpedia.org/resource/ ASK WHERE { OPTIONAL { res:Frank_Herbert dbo:deathDate ?date } FILTER ( ! bound(?date) ) }
   Expected Output: Entity assignment: 'res:Frank_Herbert' for Frank Herbert. Predicate id: 'dbo:deathDate' gives the individual's death date. To check if Frank Herbert is alive, I use ASK for a yes/no result. I do an OPTIONAL lookup for his dbo:deathDate. If there's no death date (i.e., the variable is unbound), the query returns true, meaning he's still alive.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the explanation in the same style as the examples:
start with "Entity assignment: ..." (and "Predicate ids: ..." if relevant), followed by a first-person explanation of how the SPARQL answers the question. No bullets, no code fences, no extra headings.

Question:
{question}

SPARQL:
{sparql}

Gold triples (if provided):
{triples_str}
"""


MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    items = items
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Building batch JSONL: 100%|██████████| 150/150 [00:00<00:00, 25883.31it/s]

Wrote 750 requests to /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_input.jsonl





In [7]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "QALD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


Uploaded file: file-KwFU9x7CYQkKSALdVJHDaa
Batch ID: batch_68b70d0eb36c8190aa743cd2a690650a
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl


In [8]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evaluation/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_plus_gold.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 750/750 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/end_to_end_evaluation/qald_results/codellama/qald_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_plus_gold.json


Codellama - LcQUAD

In [6]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_input.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 


SYSTEM_PROMPT = """You are an expert DBpedia/SPARQL explainer.
Your job is to generate a chain-of-thought (CoT) style explanation for a given question and its corresponding DBpedia SPARQL.
Write in the first person (“I …”). When applicable, begin with:
- Entity assignment: mapping named entities in the question to their ids (e.g., res:, dbo:)
- Predicate ids: briefly name the predicates involved
Then, in a few sentences, explain how the SPARQL answers the question.
Be precise, technical where helpful, and avoid extra fluff or markdown headings."""

FEWSHOT_EXAMPLES = """
Your task is to generate a chain-of-thought (CoT) for a pair of question and its corresponding DBPedia SPARQL. Below are example pairs. For each, provide a detailed, simplified explanation written from the perspective of "I". Use technical terms where helpful. For each example, first specify which entity ids are assigned to each named entity mentioned in the question. Also, explain the predicate ids used when needed.

1. Question: Which countries have places with more than two caves?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )
   Expected Output: Entity assignment: 'dbo:Cave' for caves, 'dbo:Country' for countries. Predicate ids: 'dbo:location' gives the location of the cave. I want to list countries that have more than two caves. I use SELECT DISTINCT to get unique country results. I first match all entities that are caves (?cave, where rdf:type dbo:Cave), then get their locations via dbo:location (?uri). Then, I ensure those locations are countries (?uri rdf:type dbo:Country). By grouping the results by country using GROUP BY and adding HAVING (COUNT(?cave) > 2), I ensure I only get countries with more than two caves.

2. Question: What is the longest river?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River { ?uri dbo:length ?l } UNION { ?uri dbp:length ?l } } ORDER BY DESC(?l) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:River' for rivers. Predicate ids: 'dbo:length', 'dbp:length' for the river length. To find the longest river, I select all rivers with ?uri a dbo:River and retrieve their lengths, checking both dbo:length and dbp:length predicates. I use UNION to ensure all lengths are covered. Then, I order the results from longest to shortest (ORDER BY DESC(?l)), and LIMIT 1 to only get the longest river.

3. Question: Do Prince Harry and Prince William have the same parents?
   SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> ASK WHERE { <http://dbpedia.org/resource/Prince_William,_Duke_of_Cambridge> dbo:parent ?x . res:Prince_Harry dbo:parent ?x }
   Expected Output: Entity assignment: 'res:Prince_Harry' and 'res:Prince_William,_Duke_of_Cambridge' for Prince Harry and Prince William. Predicate id: 'dbo:parent' is the parent relationship. This question asks if both Prince Harry and Prince William share the same parents. I use the ASK keyword for a true/false answer. I check if there is any parent (?x) linked to both Prince Harry and Prince William. If such a parent exists, the SPARQL query returns true.

4. Question: Which volcanos in Japan erupted since 2000?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:Volcano ; dbo:locatedInArea res:Japan ; dbo:eruptionYear ?date FILTER ( year(?date) >= 2000 ) }
   Expected Output: Entity assignment: 'dbo:Volcano' for volcanos, 'res:Japan' for Japan. Predicate ids: 'dbo:locatedInArea' links the volcano to Japan, 'dbo:eruptionYear' gives the eruption year. I want to find volcanos in Japan that erupted in or after the year 2000. I identify volcanos located in Japan using dbo:locatedInArea res:Japan, and then use dbo:eruptionYear to get their eruption year. I apply a FILTER with year(?date) >= 2000 to only include those since 2000.

5. Question: Give me all world heritage sites designated within the past two years.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:WorldHeritageSite . { ?uri dbp:year '2013'^^xsd:integer . } UNION { ?uri dbp:year '2014'^^xsd:integer . } }
   Expected Output: Entity assignment: 'dbo:WorldHeritageSite' for world heritage sites. Predicate ids: 'dbp:year' specifies the designation year. To find World Heritage Sites designated in the last two years, I select entities of type dbo:WorldHeritageSite using rdf:type. The pattern '?uri dbp:year '2013'^^xsd:integer' (similarly for '2014') matches sites with a 'dbp:year' property equal to an integer-valued year—here, either 2013 or 2014. The use of UNION allows us to include sites from both years. '^^xsd:integer' ensures that the year value is treated as an integer in the query. DISTINCT avoids duplicates in the results.

6. Question: Which rivers flow into a German lake?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River . ?x dbo:inflow ?uri ; a dbo:Lake ; dbo:country res:Germany }
   Expected Output: Entity assignment: 'dbo:River' for rivers, 'dbo:Lake' for lakes, 'res:Germany' for Germany. Predicate ids: 'dbo:inflow' connects the river to the lake, 'dbo:country' specifies the lake's country. I want rivers that flow into lakes in Germany. I select entities that are rivers (?uri a dbo:River), then check for lakes (?x) with dbo:inflow connecting to those rivers, and use dbo:country to restrict lakes to Germany. DISTINCT ensures each river only shows up once.

7. Question: Give me all actors starring in movies directed by and starring William Shatner.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?x dbo:director res:William_Shatner ; dbo:starring res:William_Shatner { ?x dbo:starring ?uri } UNION { ?x dbp:starring ?uri } }
   Expected Output: Entity assignment: 'res:William_Shatner' for William Shatner. Predicate ids: 'dbo:director' and 'dbo:starring' identify films directed by and starring William Shatner; 'dbp:starring' is an alternative starring predicate. I look for films that William Shatner both directed and acted in. For those films, I select co-stars using dbo:starring and dbp:starring (via UNION). DISTINCT ensures unique actor results.

8. Question: Which actor was casted in the most movies?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:Actor . ?f rdf:type dbo:Film . ?f dbo:starring ?uri . } ORDER BY DESC(COUNT(DISTINCT(?f))) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:Actor' for actors, 'dbo:Film' for films. Predicate id: 'dbo:starring' lists movie cast. To find the most prolific actor, I select all actors (?uri rdf:type dbo:Actor) and the films they've appeared in (?f rdf:type dbo:Film; ?f dbo:starring ?uri). I count the number of different films for each actor, then sort actors in descending order of movie count and use LIMIT 1 to get the one with the most appearances.

9. Question: Is Frank Herbert still alive?
   SPARQL: PREFIX dbo: http://dbpedia.org/ontology/ PREFIX res: http://dbpedia.org/resource/ ASK WHERE { OPTIONAL { res:Frank_Herbert dbo:deathDate ?date } FILTER ( ! bound(?date) ) }
   Expected Output: Entity assignment: 'res:Frank_Herbert' for Frank Herbert. Predicate id: 'dbo:deathDate' gives the individual's death date. To check if Frank Herbert is alive, I use ASK for a yes/no result. I do an OPTIONAL lookup for his dbo:deathDate. If there's no death date (i.e., the variable is unbound), the query returns true, meaning he's still alive.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the explanation in the same style as the examples:
start with "Entity assignment: ..." (and "Predicate ids: ..." if relevant), followed by a first-person explanation of how the SPARQL answers the question. No bullets, no code fences, no extra headings.

Question:
{question}

SPARQL:
{sparql}

Gold triples (if provided):
{triples_str}
"""


MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    items = items
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 7617.74it/s]


Wrote 5000 requests to /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_input.jsonl


In [9]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


Uploaded file: file-FWQWMz8s4kckeNq1kRWe3T
Batch ID: batch_68b70e33a5808190a6a5ee075e85e1ea
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl


In [10]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_plus_gold.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 4999/5000 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/lcquad_results/codellama/lcquad_test_solo_stage_10_codellama_plus_dynamic_pairs_new_cot_plus_gold.json


Vquanda - CodeLlama

In [5]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_cot_batch_input.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 


SYSTEM_PROMPT = """You are an expert DBpedia/SPARQL explainer.
Your job is to generate a chain-of-thought (CoT) style explanation for a given question and its corresponding DBpedia SPARQL.
Write in the first person (“I …”). When applicable, begin with:
- Entity assignment: mapping named entities in the question to their ids (e.g., res:, dbo:)
- Predicate ids: briefly name the predicates involved
Then, in a few sentences, explain how the SPARQL answers the question.
Be precise, technical where helpful, and avoid extra fluff or markdown headings."""

FEWSHOT_EXAMPLES = """
Your task is to generate a chain-of-thought (CoT) for a pair of question and its corresponding DBPedia SPARQL. Below are example pairs. For each, provide a detailed, simplified explanation written from the perspective of "I". Use technical terms where helpful. For each example, first specify which entity ids are assigned to each named entity mentioned in the question. Also, explain the predicate ids used when needed.

1. Question: Which countries have places with more than two caves?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )
   Expected Output: Entity assignment: 'dbo:Cave' for caves, 'dbo:Country' for countries. Predicate ids: 'dbo:location' gives the location of the cave. I want to list countries that have more than two caves. I use SELECT DISTINCT to get unique country results. I first match all entities that are caves (?cave, where rdf:type dbo:Cave), then get their locations via dbo:location (?uri). Then, I ensure those locations are countries (?uri rdf:type dbo:Country). By grouping the results by country using GROUP BY and adding HAVING (COUNT(?cave) > 2), I ensure I only get countries with more than two caves.

2. Question: What is the longest river?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River { ?uri dbo:length ?l } UNION { ?uri dbp:length ?l } } ORDER BY DESC(?l) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:River' for rivers. Predicate ids: 'dbo:length', 'dbp:length' for the river length. To find the longest river, I select all rivers with ?uri a dbo:River and retrieve their lengths, checking both dbo:length and dbp:length predicates. I use UNION to ensure all lengths are covered. Then, I order the results from longest to shortest (ORDER BY DESC(?l)), and LIMIT 1 to only get the longest river.

3. Question: Do Prince Harry and Prince William have the same parents?
   SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> ASK WHERE { <http://dbpedia.org/resource/Prince_William,_Duke_of_Cambridge> dbo:parent ?x . res:Prince_Harry dbo:parent ?x }
   Expected Output: Entity assignment: 'res:Prince_Harry' and 'res:Prince_William,_Duke_of_Cambridge' for Prince Harry and Prince William. Predicate id: 'dbo:parent' is the parent relationship. This question asks if both Prince Harry and Prince William share the same parents. I use the ASK keyword for a true/false answer. I check if there is any parent (?x) linked to both Prince Harry and Prince William. If such a parent exists, the SPARQL query returns true.

4. Question: Which volcanos in Japan erupted since 2000?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:Volcano ; dbo:locatedInArea res:Japan ; dbo:eruptionYear ?date FILTER ( year(?date) >= 2000 ) }
   Expected Output: Entity assignment: 'dbo:Volcano' for volcanos, 'res:Japan' for Japan. Predicate ids: 'dbo:locatedInArea' links the volcano to Japan, 'dbo:eruptionYear' gives the eruption year. I want to find volcanos in Japan that erupted in or after the year 2000. I identify volcanos located in Japan using dbo:locatedInArea res:Japan, and then use dbo:eruptionYear to get their eruption year. I apply a FILTER with year(?date) >= 2000 to only include those since 2000.

5. Question: Give me all world heritage sites designated within the past two years.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:WorldHeritageSite . { ?uri dbp:year '2013'^^xsd:integer . } UNION { ?uri dbp:year '2014'^^xsd:integer . } }
   Expected Output: Entity assignment: 'dbo:WorldHeritageSite' for world heritage sites. Predicate ids: 'dbp:year' specifies the designation year. To find World Heritage Sites designated in the last two years, I select entities of type dbo:WorldHeritageSite using rdf:type. The pattern '?uri dbp:year '2013'^^xsd:integer' (similarly for '2014') matches sites with a 'dbp:year' property equal to an integer-valued year—here, either 2013 or 2014. The use of UNION allows us to include sites from both years. '^^xsd:integer' ensures that the year value is treated as an integer in the query. DISTINCT avoids duplicates in the results.

6. Question: Which rivers flow into a German lake?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River . ?x dbo:inflow ?uri ; a dbo:Lake ; dbo:country res:Germany }
   Expected Output: Entity assignment: 'dbo:River' for rivers, 'dbo:Lake' for lakes, 'res:Germany' for Germany. Predicate ids: 'dbo:inflow' connects the river to the lake, 'dbo:country' specifies the lake's country. I want rivers that flow into lakes in Germany. I select entities that are rivers (?uri a dbo:River), then check for lakes (?x) with dbo:inflow connecting to those rivers, and use dbo:country to restrict lakes to Germany. DISTINCT ensures each river only shows up once.

7. Question: Give me all actors starring in movies directed by and starring William Shatner.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?x dbo:director res:William_Shatner ; dbo:starring res:William_Shatner { ?x dbo:starring ?uri } UNION { ?x dbp:starring ?uri } }
   Expected Output: Entity assignment: 'res:William_Shatner' for William Shatner. Predicate ids: 'dbo:director' and 'dbo:starring' identify films directed by and starring William Shatner; 'dbp:starring' is an alternative starring predicate. I look for films that William Shatner both directed and acted in. For those films, I select co-stars using dbo:starring and dbp:starring (via UNION). DISTINCT ensures unique actor results.

8. Question: Which actor was casted in the most movies?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:Actor . ?f rdf:type dbo:Film . ?f dbo:starring ?uri . } ORDER BY DESC(COUNT(DISTINCT(?f))) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:Actor' for actors, 'dbo:Film' for films. Predicate id: 'dbo:starring' lists movie cast. To find the most prolific actor, I select all actors (?uri rdf:type dbo:Actor) and the films they've appeared in (?f rdf:type dbo:Film; ?f dbo:starring ?uri). I count the number of different films for each actor, then sort actors in descending order of movie count and use LIMIT 1 to get the one with the most appearances.

9. Question: Is Frank Herbert still alive?
   SPARQL: PREFIX dbo: http://dbpedia.org/ontology/ PREFIX res: http://dbpedia.org/resource/ ASK WHERE { OPTIONAL { res:Frank_Herbert dbo:deathDate ?date } FILTER ( ! bound(?date) ) }
   Expected Output: Entity assignment: 'res:Frank_Herbert' for Frank Herbert. Predicate id: 'dbo:deathDate' gives the individual's death date. To check if Frank Herbert is alive, I use ASK for a yes/no result. I do an OPTIONAL lookup for his dbo:deathDate. If there's no death date (i.e., the variable is unbound), the query returns true, meaning he's still alive.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the explanation in the same style as the examples:
start with "Entity assignment: ..." (and "Predicate ids: ..." if relevant), followed by a first-person explanation of how the SPARQL answers the question. No bullets, no code fences, no extra headings.

Question:
{question}

SPARQL:
{sparql}

Gold triples (if provided):
{triples_str}
"""


MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    items = items
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 16008.06it/s]


Wrote 5000 requests to /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_cot_batch_input.jsonl


In [6]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_cot_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


Uploaded file: file-RqhBqqenT1YxAYZSfmChik
Batch ID: batch_68dd8c9de71481908593f5ba4567e82f
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl


In [7]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_new_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()


Inserted COT for 5000/5000 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/codellama/vquanda_test_codellama_top_10_codellama_plus_dynamic_pairs_cot.json


Vquanda - Mixtral

In [4]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/mixtral/vquanda_test_mixtral_top_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/vquanda/vquanda_test_mixtral_top_10_plus_dynamic_pairs_cot_batch_input.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 


SYSTEM_PROMPT = """You are an expert DBpedia/SPARQL explainer.
Your job is to generate a chain-of-thought (CoT) style explanation for a given question and its corresponding DBpedia SPARQL.
Write in the first person (“I …”). When applicable, begin with:
- Entity assignment: mapping named entities in the question to their ids (e.g., res:, dbo:)
- Predicate ids: briefly name the predicates involved
Then, in a few sentences, explain how the SPARQL answers the question.
Be precise, technical where helpful, and avoid extra fluff or markdown headings."""

FEWSHOT_EXAMPLES = """
Your task is to generate a chain-of-thought (CoT) for a pair of question and its corresponding DBPedia SPARQL. Below are example pairs. For each, provide a detailed, simplified explanation written from the perspective of "I". Use technical terms where helpful. For each example, first specify which entity ids are assigned to each named entity mentioned in the question. Also, explain the predicate ids used when needed.

1. Question: Which countries have places with more than two caves?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )
   Expected Output: Entity assignment: 'dbo:Cave' for caves, 'dbo:Country' for countries. Predicate ids: 'dbo:location' gives the location of the cave. I want to list countries that have more than two caves. I use SELECT DISTINCT to get unique country results. I first match all entities that are caves (?cave, where rdf:type dbo:Cave), then get their locations via dbo:location (?uri). Then, I ensure those locations are countries (?uri rdf:type dbo:Country). By grouping the results by country using GROUP BY and adding HAVING (COUNT(?cave) > 2), I ensure I only get countries with more than two caves.

2. Question: What is the longest river?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River { ?uri dbo:length ?l } UNION { ?uri dbp:length ?l } } ORDER BY DESC(?l) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:River' for rivers. Predicate ids: 'dbo:length', 'dbp:length' for the river length. To find the longest river, I select all rivers with ?uri a dbo:River and retrieve their lengths, checking both dbo:length and dbp:length predicates. I use UNION to ensure all lengths are covered. Then, I order the results from longest to shortest (ORDER BY DESC(?l)), and LIMIT 1 to only get the longest river.

3. Question: Do Prince Harry and Prince William have the same parents?
   SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> ASK WHERE { <http://dbpedia.org/resource/Prince_William,_Duke_of_Cambridge> dbo:parent ?x . res:Prince_Harry dbo:parent ?x }
   Expected Output: Entity assignment: 'res:Prince_Harry' and 'res:Prince_William,_Duke_of_Cambridge' for Prince Harry and Prince William. Predicate id: 'dbo:parent' is the parent relationship. This question asks if both Prince Harry and Prince William share the same parents. I use the ASK keyword for a true/false answer. I check if there is any parent (?x) linked to both Prince Harry and Prince William. If such a parent exists, the SPARQL query returns true.

4. Question: Which volcanos in Japan erupted since 2000?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:Volcano ; dbo:locatedInArea res:Japan ; dbo:eruptionYear ?date FILTER ( year(?date) >= 2000 ) }
   Expected Output: Entity assignment: 'dbo:Volcano' for volcanos, 'res:Japan' for Japan. Predicate ids: 'dbo:locatedInArea' links the volcano to Japan, 'dbo:eruptionYear' gives the eruption year. I want to find volcanos in Japan that erupted in or after the year 2000. I identify volcanos located in Japan using dbo:locatedInArea res:Japan, and then use dbo:eruptionYear to get their eruption year. I apply a FILTER with year(?date) >= 2000 to only include those since 2000.

5. Question: Give me all world heritage sites designated within the past two years.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:WorldHeritageSite . { ?uri dbp:year '2013'^^xsd:integer . } UNION { ?uri dbp:year '2014'^^xsd:integer . } }
   Expected Output: Entity assignment: 'dbo:WorldHeritageSite' for world heritage sites. Predicate ids: 'dbp:year' specifies the designation year. To find World Heritage Sites designated in the last two years, I select entities of type dbo:WorldHeritageSite using rdf:type. The pattern '?uri dbp:year '2013'^^xsd:integer' (similarly for '2014') matches sites with a 'dbp:year' property equal to an integer-valued year—here, either 2013 or 2014. The use of UNION allows us to include sites from both years. '^^xsd:integer' ensures that the year value is treated as an integer in the query. DISTINCT avoids duplicates in the results.

6. Question: Which rivers flow into a German lake?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River . ?x dbo:inflow ?uri ; a dbo:Lake ; dbo:country res:Germany }
   Expected Output: Entity assignment: 'dbo:River' for rivers, 'dbo:Lake' for lakes, 'res:Germany' for Germany. Predicate ids: 'dbo:inflow' connects the river to the lake, 'dbo:country' specifies the lake's country. I want rivers that flow into lakes in Germany. I select entities that are rivers (?uri a dbo:River), then check for lakes (?x) with dbo:inflow connecting to those rivers, and use dbo:country to restrict lakes to Germany. DISTINCT ensures each river only shows up once.

7. Question: Give me all actors starring in movies directed by and starring William Shatner.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?x dbo:director res:William_Shatner ; dbo:starring res:William_Shatner { ?x dbo:starring ?uri } UNION { ?x dbp:starring ?uri } }
   Expected Output: Entity assignment: 'res:William_Shatner' for William Shatner. Predicate ids: 'dbo:director' and 'dbo:starring' identify films directed by and starring William Shatner; 'dbp:starring' is an alternative starring predicate. I look for films that William Shatner both directed and acted in. For those films, I select co-stars using dbo:starring and dbp:starring (via UNION). DISTINCT ensures unique actor results.

8. Question: Which actor was casted in the most movies?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:Actor . ?f rdf:type dbo:Film . ?f dbo:starring ?uri . } ORDER BY DESC(COUNT(DISTINCT(?f))) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:Actor' for actors, 'dbo:Film' for films. Predicate id: 'dbo:starring' lists movie cast. To find the most prolific actor, I select all actors (?uri rdf:type dbo:Actor) and the films they've appeared in (?f rdf:type dbo:Film; ?f dbo:starring ?uri). I count the number of different films for each actor, then sort actors in descending order of movie count and use LIMIT 1 to get the one with the most appearances.

9. Question: Is Frank Herbert still alive?
   SPARQL: PREFIX dbo: http://dbpedia.org/ontology/ PREFIX res: http://dbpedia.org/resource/ ASK WHERE { OPTIONAL { res:Frank_Herbert dbo:deathDate ?date } FILTER ( ! bound(?date) ) }
   Expected Output: Entity assignment: 'res:Frank_Herbert' for Frank Herbert. Predicate id: 'dbo:deathDate' gives the individual's death date. To check if Frank Herbert is alive, I use ASK for a yes/no result. I do an OPTIONAL lookup for his dbo:deathDate. If there's no death date (i.e., the variable is unbound), the query returns true, meaning he's still alive.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the explanation in the same style as the examples:
start with "Entity assignment: ..." (and "Predicate ids: ..." if relevant), followed by a first-person explanation of how the SPARQL answers the question. No bullets, no code fences, no extra headings.

Question:
{question}

SPARQL:
{sparql}

Gold triples (if provided):
{triples_str}
"""


MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    items = items
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 29688.72it/s]


Wrote 3000 requests to /home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/vquanda/vquanda_test_mixtral_top_10_plus_dynamic_pairs_cot_batch_input.jsonl


In [5]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/vquanda/vquanda_test_mixtral_top_10_plus_dynamic_pairs_cot_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/mixtral/vquanda_test_mixtral_top_10_plus_dynamic_pairs_new_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


Uploaded file: file-FgvZvSpt4EFpW9c539kShF
Batch ID: batch_68deb09f2aa88190a52e982d733e601c
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/mixtral/vquanda_test_mixtral_top_10_plus_dynamic_pairs_new_cot_batch_output.jsonl


In [6]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/mixtral/vquanda_test_mixtral_top_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/mixtral/vquanda_test_mixtral_top_10_plus_dynamic_pairs_new_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/mixtral/vquanda_test_mixtral_top_10_plus_dynamic_pairs_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()

Inserted COT for 3000/3000 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/mixtral/vquanda_test_mixtral_top_10_plus_dynamic_pairs_cot.json


Vquanda - GPT

In [1]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot_batch_input.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 


SYSTEM_PROMPT = """You are an expert DBpedia/SPARQL explainer.
Your job is to generate a chain-of-thought (CoT) style explanation for a given question and its corresponding DBpedia SPARQL.
Write in the first person (“I …”). When applicable, begin with:
- Entity assignment: mapping named entities in the question to their ids (e.g., res:, dbo:)
- Predicate ids: briefly name the predicates involved
Then, in a few sentences, explain how the SPARQL answers the question.
Be precise, technical where helpful, and avoid extra fluff or markdown headings."""

FEWSHOT_EXAMPLES = """
Your task is to generate a chain-of-thought (CoT) for a pair of question and its corresponding DBPedia SPARQL. Below are example pairs. For each, provide a detailed, simplified explanation written from the perspective of "I". Use technical terms where helpful. For each example, first specify which entity ids are assigned to each named entity mentioned in the question. Also, explain the predicate ids used when needed.

1. Question: Which countries have places with more than two caves?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )
   Expected Output: Entity assignment: 'dbo:Cave' for caves, 'dbo:Country' for countries. Predicate ids: 'dbo:location' gives the location of the cave. I want to list countries that have more than two caves. I use SELECT DISTINCT to get unique country results. I first match all entities that are caves (?cave, where rdf:type dbo:Cave), then get their locations via dbo:location (?uri). Then, I ensure those locations are countries (?uri rdf:type dbo:Country). By grouping the results by country using GROUP BY and adding HAVING (COUNT(?cave) > 2), I ensure I only get countries with more than two caves.

2. Question: What is the longest river?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River { ?uri dbo:length ?l } UNION { ?uri dbp:length ?l } } ORDER BY DESC(?l) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:River' for rivers. Predicate ids: 'dbo:length', 'dbp:length' for the river length. To find the longest river, I select all rivers with ?uri a dbo:River and retrieve their lengths, checking both dbo:length and dbp:length predicates. I use UNION to ensure all lengths are covered. Then, I order the results from longest to shortest (ORDER BY DESC(?l)), and LIMIT 1 to only get the longest river.

3. Question: Do Prince Harry and Prince William have the same parents?
   SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> ASK WHERE { <http://dbpedia.org/resource/Prince_William,_Duke_of_Cambridge> dbo:parent ?x . res:Prince_Harry dbo:parent ?x }
   Expected Output: Entity assignment: 'res:Prince_Harry' and 'res:Prince_William,_Duke_of_Cambridge' for Prince Harry and Prince William. Predicate id: 'dbo:parent' is the parent relationship. This question asks if both Prince Harry and Prince William share the same parents. I use the ASK keyword for a true/false answer. I check if there is any parent (?x) linked to both Prince Harry and Prince William. If such a parent exists, the SPARQL query returns true.

4. Question: Which volcanos in Japan erupted since 2000?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:Volcano ; dbo:locatedInArea res:Japan ; dbo:eruptionYear ?date FILTER ( year(?date) >= 2000 ) }
   Expected Output: Entity assignment: 'dbo:Volcano' for volcanos, 'res:Japan' for Japan. Predicate ids: 'dbo:locatedInArea' links the volcano to Japan, 'dbo:eruptionYear' gives the eruption year. I want to find volcanos in Japan that erupted in or after the year 2000. I identify volcanos located in Japan using dbo:locatedInArea res:Japan, and then use dbo:eruptionYear to get their eruption year. I apply a FILTER with year(?date) >= 2000 to only include those since 2000.

5. Question: Give me all world heritage sites designated within the past two years.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:WorldHeritageSite . { ?uri dbp:year '2013'^^xsd:integer . } UNION { ?uri dbp:year '2014'^^xsd:integer . } }
   Expected Output: Entity assignment: 'dbo:WorldHeritageSite' for world heritage sites. Predicate ids: 'dbp:year' specifies the designation year. To find World Heritage Sites designated in the last two years, I select entities of type dbo:WorldHeritageSite using rdf:type. The pattern '?uri dbp:year '2013'^^xsd:integer' (similarly for '2014') matches sites with a 'dbp:year' property equal to an integer-valued year—here, either 2013 or 2014. The use of UNION allows us to include sites from both years. '^^xsd:integer' ensures that the year value is treated as an integer in the query. DISTINCT avoids duplicates in the results.

6. Question: Which rivers flow into a German lake?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River . ?x dbo:inflow ?uri ; a dbo:Lake ; dbo:country res:Germany }
   Expected Output: Entity assignment: 'dbo:River' for rivers, 'dbo:Lake' for lakes, 'res:Germany' for Germany. Predicate ids: 'dbo:inflow' connects the river to the lake, 'dbo:country' specifies the lake's country. I want rivers that flow into lakes in Germany. I select entities that are rivers (?uri a dbo:River), then check for lakes (?x) with dbo:inflow connecting to those rivers, and use dbo:country to restrict lakes to Germany. DISTINCT ensures each river only shows up once.

7. Question: Give me all actors starring in movies directed by and starring William Shatner.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?x dbo:director res:William_Shatner ; dbo:starring res:William_Shatner { ?x dbo:starring ?uri } UNION { ?x dbp:starring ?uri } }
   Expected Output: Entity assignment: 'res:William_Shatner' for William Shatner. Predicate ids: 'dbo:director' and 'dbo:starring' identify films directed by and starring William Shatner; 'dbp:starring' is an alternative starring predicate. I look for films that William Shatner both directed and acted in. For those films, I select co-stars using dbo:starring and dbp:starring (via UNION). DISTINCT ensures unique actor results.

8. Question: Which actor was casted in the most movies?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:Actor . ?f rdf:type dbo:Film . ?f dbo:starring ?uri . } ORDER BY DESC(COUNT(DISTINCT(?f))) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:Actor' for actors, 'dbo:Film' for films. Predicate id: 'dbo:starring' lists movie cast. To find the most prolific actor, I select all actors (?uri rdf:type dbo:Actor) and the films they've appeared in (?f rdf:type dbo:Film; ?f dbo:starring ?uri). I count the number of different films for each actor, then sort actors in descending order of movie count and use LIMIT 1 to get the one with the most appearances.

9. Question: Is Frank Herbert still alive?
   SPARQL: PREFIX dbo: http://dbpedia.org/ontology/ PREFIX res: http://dbpedia.org/resource/ ASK WHERE { OPTIONAL { res:Frank_Herbert dbo:deathDate ?date } FILTER ( ! bound(?date) ) }
   Expected Output: Entity assignment: 'res:Frank_Herbert' for Frank Herbert. Predicate id: 'dbo:deathDate' gives the individual's death date. To check if Frank Herbert is alive, I use ASK for a yes/no result. I do an OPTIONAL lookup for his dbo:deathDate. If there's no death date (i.e., the variable is unbound), the query returns true, meaning he's still alive.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the explanation in the same style as the examples:
start with "Entity assignment: ..." (and "Predicate ids: ..." if relevant), followed by a first-person explanation of how the SPARQL answers the question. No bullets, no code fences, no extra headings.

Question:
{question}

SPARQL:
{sparql}

Gold triples (if provided):
{triples_str}
"""


MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    items = items
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Building batch JSONL: 100%|██████████| 1000/1000 [00:00<00:00, 26086.41it/s]


Wrote 3000 requests to /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot_batch_input.jsonl


In [2]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


Uploaded file: file-49maACkUMd6hVxjPGuwXRC
Batch ID: batch_68e2b955a62481908074b45fd6b35961
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: finalizing
Status: finalizing
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot_batch_output.jsonl


In [4]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()

Inserted COT for 3000/3000 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/dycot/vquanda_results/gpt/vquanda_test_gpt_top_10_plus_dynamic_pairs_cot.json


Zero Shot GPT QALD

In [1]:
import os
import json
import pathlib
from typing import Any, Dict, List
from tqdm import tqdm
import re
import openai

INPUT_PATH   = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_gold_dynamic_pairs.json"
BATCH_OUTPUT_PATH = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_input.jsonl"
MODEL        = "gpt-4.1"
TEMPERATURE  = 0.0
RESUME       = True 


SYSTEM_PROMPT = """You are an expert DBpedia/SPARQL explainer.
Your job is to generate a chain-of-thought (CoT) style explanation for a given question and its corresponding DBpedia SPARQL.
Write in the first person (“I …”). When applicable, begin with:
- Entity assignment: mapping named entities in the question to their ids (e.g., res:, dbo:)
- Predicate ids: briefly name the predicates involved
Then, in a few sentences, explain how the SPARQL answers the question.
Be precise, technical where helpful, and avoid extra fluff or markdown headings."""

FEWSHOT_EXAMPLES = """
Your task is to generate a chain-of-thought (CoT) for a pair of question and its corresponding DBPedia SPARQL. Below are example pairs. For each, provide a detailed, simplified explanation written from the perspective of "I". Use technical terms where helpful. For each example, first specify which entity ids are assigned to each named entity mentioned in the question. Also, explain the predicate ids used when needed.

1. Question: Which countries have places with more than two caves?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )
   Expected Output: Entity assignment: 'dbo:Cave' for caves, 'dbo:Country' for countries. Predicate ids: 'dbo:location' gives the location of the cave. I want to list countries that have more than two caves. I use SELECT DISTINCT to get unique country results. I first match all entities that are caves (?cave, where rdf:type dbo:Cave), then get their locations via dbo:location (?uri). Then, I ensure those locations are countries (?uri rdf:type dbo:Country). By grouping the results by country using GROUP BY and adding HAVING (COUNT(?cave) > 2), I ensure I only get countries with more than two caves.

2. Question: What is the longest river?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River { ?uri dbo:length ?l } UNION { ?uri dbp:length ?l } } ORDER BY DESC(?l) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:River' for rivers. Predicate ids: 'dbo:length', 'dbp:length' for the river length. To find the longest river, I select all rivers with ?uri a dbo:River and retrieve their lengths, checking both dbo:length and dbp:length predicates. I use UNION to ensure all lengths are covered. Then, I order the results from longest to shortest (ORDER BY DESC(?l)), and LIMIT 1 to only get the longest river.

3. Question: Do Prince Harry and Prince William have the same parents?
   SPARQL: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> ASK WHERE { <http://dbpedia.org/resource/Prince_William,_Duke_of_Cambridge> dbo:parent ?x . res:Prince_Harry dbo:parent ?x }
   Expected Output: Entity assignment: 'res:Prince_Harry' and 'res:Prince_William,_Duke_of_Cambridge' for Prince Harry and Prince William. Predicate id: 'dbo:parent' is the parent relationship. This question asks if both Prince Harry and Prince William share the same parents. I use the ASK keyword for a true/false answer. I check if there is any parent (?x) linked to both Prince Harry and Prince William. If such a parent exists, the SPARQL query returns true.

4. Question: Which volcanos in Japan erupted since 2000?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:Volcano ; dbo:locatedInArea res:Japan ; dbo:eruptionYear ?date FILTER ( year(?date) >= 2000 ) }
   Expected Output: Entity assignment: 'dbo:Volcano' for volcanos, 'res:Japan' for Japan. Predicate ids: 'dbo:locatedInArea' links the volcano to Japan, 'dbo:eruptionYear' gives the eruption year. I want to find volcanos in Japan that erupted in or after the year 2000. I identify volcanos located in Japan using dbo:locatedInArea res:Japan, and then use dbo:eruptionYear to get their eruption year. I apply a FILTER with year(?date) >= 2000 to only include those since 2000.

5. Question: Give me all world heritage sites designated within the past two years.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:WorldHeritageSite . { ?uri dbp:year '2013'^^xsd:integer . } UNION { ?uri dbp:year '2014'^^xsd:integer . } }
   Expected Output: Entity assignment: 'dbo:WorldHeritageSite' for world heritage sites. Predicate ids: 'dbp:year' specifies the designation year. To find World Heritage Sites designated in the last two years, I select entities of type dbo:WorldHeritageSite using rdf:type. The pattern '?uri dbp:year '2013'^^xsd:integer' (similarly for '2014') matches sites with a 'dbp:year' property equal to an integer-valued year—here, either 2013 or 2014. The use of UNION allows us to include sites from both years. '^^xsd:integer' ensures that the year value is treated as an integer in the query. DISTINCT avoids duplicates in the results.

6. Question: Which rivers flow into a German lake?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri a dbo:River . ?x dbo:inflow ?uri ; a dbo:Lake ; dbo:country res:Germany }
   Expected Output: Entity assignment: 'dbo:River' for rivers, 'dbo:Lake' for lakes, 'res:Germany' for Germany. Predicate ids: 'dbo:inflow' connects the river to the lake, 'dbo:country' specifies the lake's country. I want rivers that flow into lakes in Germany. I select entities that are rivers (?uri a dbo:River), then check for lakes (?x) with dbo:inflow connecting to those rivers, and use dbo:country to restrict lakes to Germany. DISTINCT ensures each river only shows up once.

7. Question: Give me all actors starring in movies directed by and starring William Shatner.
   SPARQL: SELECT DISTINCT ?uri WHERE { ?x dbo:director res:William_Shatner ; dbo:starring res:William_Shatner { ?x dbo:starring ?uri } UNION { ?x dbp:starring ?uri } }
   Expected Output: Entity assignment: 'res:William_Shatner' for William Shatner. Predicate ids: 'dbo:director' and 'dbo:starring' identify films directed by and starring William Shatner; 'dbp:starring' is an alternative starring predicate. I look for films that William Shatner both directed and acted in. For those films, I select co-stars using dbo:starring and dbp:starring (via UNION). DISTINCT ensures unique actor results.

8. Question: Which actor was casted in the most movies?
   SPARQL: SELECT DISTINCT ?uri WHERE { ?uri rdf:type dbo:Actor . ?f rdf:type dbo:Film . ?f dbo:starring ?uri . } ORDER BY DESC(COUNT(DISTINCT(?f))) OFFSET 0 LIMIT 1
   Expected Output: Entity assignment: 'dbo:Actor' for actors, 'dbo:Film' for films. Predicate id: 'dbo:starring' lists movie cast. To find the most prolific actor, I select all actors (?uri rdf:type dbo:Actor) and the films they've appeared in (?f rdf:type dbo:Film; ?f dbo:starring ?uri). I count the number of different films for each actor, then sort actors in descending order of movie count and use LIMIT 1 to get the one with the most appearances.

9. Question: Is Frank Herbert still alive?
   SPARQL: PREFIX dbo: http://dbpedia.org/ontology/ PREFIX res: http://dbpedia.org/resource/ ASK WHERE { OPTIONAL { res:Frank_Herbert dbo:deathDate ?date } FILTER ( ! bound(?date) ) }
   Expected Output: Entity assignment: 'res:Frank_Herbert' for Frank Herbert. Predicate id: 'dbo:deathDate' gives the individual's death date. To check if Frank Herbert is alive, I use ASK for a yes/no result. I do an OPTIONAL lookup for his dbo:deathDate. If there's no death date (i.e., the variable is unbound), the query returns true, meaning he's still alive.
"""

ITEM_INSTRUCTIONS = """Now write the explanation for the following item. Output ONLY the explanation in the same style as the examples:
start with "Entity assignment: ..." (and "Predicate ids: ..." if relevant), followed by a first-person explanation of how the SPARQL answers the question. No bullets, no code fences, no extra headings.

Question:
{question}

SPARQL:
{sparql}

Gold triples (if provided):
{triples_str}
"""


MODEL       = "gpt-4.1"
TEMPERATURE = 0.0
MAX_TOKENS  = 500

def load_any(path: str) -> List[Dict[str, Any]]:
    p = pathlib.Path(path)
    if p.suffix.lower() == ".jsonl":
        return [json.loads(line) for line in p.read_text(encoding="utf-8").splitlines() if line.strip()]
    data = json.loads(p.read_text(encoding="utf-8"))
    if isinstance(data, list):
        return data
    if isinstance(data, dict) and isinstance(data.get("questions"), list):
        return data["questions"]
    if isinstance(data, dict):
        return [data]
    raise ValueError("Unrecognized JSON structure")

def to_human_triples(triples: Any) -> str:
    lines: List[str] = []
    if isinstance(triples, list):
        for t in triples:
            if isinstance(t, (list, tuple)) and len(t) == 3:
                s, p, o = t
                lines.append(f"- <{s}, {p}, {o}>")
            else:
                lines.append(f"- {t}")
    else:
        lines.append(str(triples))
    return "\n".join(lines) if lines else "- (none)"

def build_messages(question: str, triples: Any, formatted_query: str) -> List[Dict[str, str]]:
    prompt = FEWSHOT_EXAMPLES + "\n\n" + ITEM_INSTRUCTIONS.format(
        question=question.strip(),
        triples_str=to_human_triples(triples),
        sparql=formatted_query.strip()
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
    ]

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]  # keep ids short-ish

def main():
    items = load_any(INPUT_PATH)
    items = items
    out = []
    for idx, entry in enumerate(tqdm(items, desc="Building batch JSONL")):
        eid = entry.get("id") or entry.get("qid") or idx
        dynamic_pairs = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dynamic_pairs):
            question = (dp.get("question") or "").strip()
            triples  = dp.get("triples", [])
            sparql   = (dp.get("sparql") or "").strip()
            messages = build_messages(question, triples, sparql)

            out.append({
                "custom_id": f"{sanitize(eid)}__dp{i}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "temperature": TEMPERATURE,
                    "max_tokens": MAX_TOKENS,
                    "messages": messages
                }
            })

    pathlib.Path(BATCH_OUTPUT_PATH).parent.mkdir(parents=True, exist_ok=True)
    with open(BATCH_OUTPUT_PATH, "w", encoding="utf-8") as f:
        for obj in out:
            f.write(json.dumps(obj, ensure_ascii=False) + "\n")

    print(f"Wrote {len(out)} requests to {BATCH_OUTPUT_PATH}")

if __name__ == "__main__":
    main()


Building batch JSONL: 100%|██████████| 150/150 [00:00<00:00, 30025.08it/s]

Wrote 740 requests to /home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_input.jsonl





In [2]:
import openai
import time
import json

client = openai.OpenAI()

BATCH_INPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_input.jsonl"
BATCH_OUTPUT_FILE_PATH = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_output.jsonl"

def main():
    upload = client.files.create(
        file=open(BATCH_INPUT_FILE_PATH, "rb"),
        purpose="batch",
    )
    print("Uploaded file:", upload.id)

    batch = client.batches.create(
        input_file_id=upload.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"job": "LcQUAD_COT"}
    )
    print("Batch ID:", batch.id)

    # Poll
    while True:
        b = client.batches.retrieve(batch.id)
        print("Status:", b.status)
        if b.status in {"failed", "completed", "expired", "cancelled"}:
            break
        time.sleep(60)

    if b.status != "completed":
        print("Batch ended with status:", b.status)
        if getattr(b, "error_file_id", None):
            err_txt = client.files.content(b.error_file_id).text
            print("Error file content:\n", err_txt[:2000])
        raise SystemExit(1)

    out_txt = client.files.content(b.output_file_id).text
    with open(BATCH_OUTPUT_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(out_txt)
    print("Saved:", BATCH_OUTPUT_FILE_PATH)

if __name__ == "__main__":
    main()


Uploaded file: file-LsnnGsaAYcPVJBAMUsX7Xv
Batch ID: batch_68f6b9270ad08190a6b49c7d6b9794e3
Status: validating
Status: validating
Status: in_progress
Status: finalizing
Status: completed
Saved: /home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_output.jsonl


In [1]:
import json, pathlib, re
from typing import Any, Dict, List, Tuple, Optional

ORIG_INPUT_PATH     = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_gold_dynamic_pairs.json"
BATCH_OUTPUT_FILE   = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_output.jsonl"
MERGED_OUTPUT_JSON  = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot.json"

def parse_batch_output(path: str) -> Dict[str, str]:
    mapping: Dict[str, str] = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            cid = obj.get("custom_id")
            resp = obj.get("response") or {}
            status_code = resp.get("status_code")
            body = resp.get("body") or {}
            if status_code == 200:
                try:
                    content = body["choices"][0]["message"]["content"].strip()
                except Exception:
                    content = json.dumps(body)[:2000]
            else:
                err = (body.get("error") or {}).get("message") or f"Non-200 status: {status_code}"
                content = f"[ERROR] {err}"
            if cid:
                mapping[cid] = content
    return mapping

def sanitize(s: str) -> str:
    s = str(s)
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-zA-Z0-9._:-]", "_", s)
    return s[:120]

def _find_items_key_in_dict(d: Dict[str, Any]) -> Optional[str]:
    candidate_keys = [k for k, v in d.items() if isinstance(v, list) and all(isinstance(x, dict) for x in v)]
    for k in candidate_keys:
        v = d[k]
        if any(isinstance(x, dict) and "dynamic_pairs" in x for x in v):
            return k
    return candidate_keys[0] if candidate_keys else None

def load_container(path: str) -> Tuple[Any, List[Dict[str, Any]], Optional[str]]:
    with open(path, "r", encoding="utf-8") as f:
        root = json.load(f)

    if isinstance(root, list):
        return root, root, None

    if isinstance(root, dict):
        # Common case: {"questions": [...]}
        if isinstance(root.get("questions"), list):
            return root, root["questions"], "questions"

        k = _find_items_key_in_dict(root)
        if k is None:
            raise ValueError("Could not locate the list of items in the original JSON.")
        return root, root[k], k

    raise ValueError("Original JSON must be either a list or a dict containing a list of items.")

def main():
    root_obj, items_list, items_key = load_container(ORIG_INPUT_PATH)
    cid_to_text = parse_batch_output(BATCH_OUTPUT_FILE)

    updated = 0
    total_pairs = 0

    for idx, entry in enumerate(items_list):
        eid = entry.get("id") or entry.get("qid") or idx
        dps = entry.get("dynamic_pairs", [])
        for i, dp in enumerate(dps):
            total_pairs += 1
            cid = f"{sanitize(eid)}__dp{i}"
            if cid in cid_to_text:
                dp["cot"] = cid_to_text[cid]
                updated += 1

    pathlib.Path(MERGED_OUTPUT_JSON).parent.mkdir(parents=True, exist_ok=True)
    with open(MERGED_OUTPUT_JSON, "w", encoding="utf-8") as f:
        json.dump(root_obj, f, ensure_ascii=False, indent=2)

    print(f"Inserted COT for {updated}/{total_pairs} dynamic_pairs.")
    print(f"Wrote JSON to {MERGED_OUTPUT_JSON}")

if __name__ == "__main__":
    main()

Inserted COT for 740/740 dynamic_pairs.
Wrote JSON to /home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot.json


In [2]:
from copy import deepcopy
import json
from typing import List, Dict, Any, Iterable, Union, Tuple

NUM_DEMOS = 3

input_path  = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot.json"
batch_jsonl_path     = "/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_input.jsonl"
MODEL = "ft:gpt-3.5-turbo-0125:personal::C9J5ld48"

def _escape_json_string(s: str) -> str:
    return (
        s.replace("\\", "\\\\")
         .replace('"', '\\"')
         .replace("\n", "\\n")
         .replace("\r", "\\r")
    )

GENERIC_INSTR = (
    'Given a specific question, generate the corresponding SPARQL query for DBpedia. '
    'Return your answer after <Answer>, in JSON with key "sparql" and the query as its string value.'
)

def build_system_msg(sample: Dict[str, Any]) -> Dict[str, str]:
    demo_list = sample.get("dynamic_pairs") or sample.get("dynamic_paris") or []
    if not demo_list:
        return {"role": "system", "content": GENERIC_INSTR}

    blocks = []
    for i, demo in enumerate(demo_list[:NUM_DEMOS], start=1):
        demo = demo or {}
        demo_q: str = str(demo.get("question", "")).strip()
        demo_sparql: str = str(demo.get("sparql", "")).strip()
        demo_cot: str = str(demo.get("cot", "")).strip()

        if not demo_q or not demo_sparql:
            continue

        demo_answer = (
            "<Answer>\n"
            f"{{\"sparql\": \"{_escape_json_string(demo_sparql)}\"}}"
        )
        if demo_cot:
            demo_answer += f"\n<Chain-of-Thought>\n{_escape_json_string(demo_cot)}"

        block = (
            f"Example {i} INPUT (exactly what you will receive for every task)\n\n"
            f"Question:\n{demo_q}\n\n"
            f"Example {i} OUTPUT (your response must follow **this exact shape**)\n\n"
            f"{demo_answer}\n"
        )
        blocks.append(block)

    if not blocks:
        return {"role": "system", "content": GENERIC_INSTR}

    header = (
        "Given a specific question, generate the corresponding SPARQL query for DBpedia. "
        "Return your answer after <Answer>, in JSON with key \"sparql\" and the query as its string value.\n\n"
    )
    content = header + "\n".join(blocks)
    return {"role": "system", "content": content}

def main():
    with open(input_path, encoding="utf-8") as f:
        dataset = json.load(f)

    jsonl_rows = []
    for sample in dataset:
        question = sample.get("question", "").strip()

        user_msg = {
            "role": "user",
            "content": f"Question:\n{question}"
        }
        system_msg = build_system_msg(sample)
        jsonl_rows.append({"messages": [system_msg, user_msg]})

    count = 0
    with open(batch_jsonl_path, "w", encoding="utf-8") as fout:
        for idx, row in enumerate(jsonl_rows):
            messages = row["messages"]
            batch_row = {
                "custom_id": f"example_{idx}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL,
                    "messages": messages,
                    "temperature": 0
                }
            }
            fout.write(json.dumps(batch_row) + "\n")
            count += 1

    print(f"[1/1] Wrote {count} batch lines to {batch_jsonl_path}")
    if jsonl_rows:
        print("Preview of first record:\n", json.dumps(jsonl_rows[0], indent=2)[:900])

if __name__ == "__main__":
    main()

[1/1] Wrote 150 batch lines to /home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_input.jsonl
Preview of first record:
 {
  "messages": [
    {
      "role": "system",
      "content": "Given a specific question, generate the corresponding SPARQL query for DBpedia. Return your answer after <Answer>, in JSON with key \"sparql\" and the query as its string value.\n\nExample 1 INPUT (exactly what you will receive for every task)\n\nQuestion:\nWhat is the timezone in San Pedro de Atacama?\n\nExample 1 OUTPUT (your response must follow **this exact shape**)\n\n<Answer>\n{\"sparql\": \"SELECT DISTINCT ?uri WHERE { res:San_Pedro_de_Atacama dbo:timeZone ?uri }\"}\n<Chain-of-Thought>\nEntity assignment: 'res:San_Pedro_de_Atacama' for San Pedro de Atacama. Predicate id: 'dbo:timeZone' gives the timezone of a place. I want to find the timezone for San Pedro de Atacama. I directly query the resou

In [3]:
from openai import OpenAI
import time
import json
client = OpenAI()

upload = client.files.create(
    file=open("/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_input.jsonl", "rb"),
    purpose="batch"
)
input_file_id = upload.id
print("Uploaded file:", input_file_id)

batch = client.batches.create(
    input_file_id     = input_file_id,
    endpoint          = "/v1/chat/completions",
    completion_window = "24h",
    metadata          = {"job": "QALD test inference"}
)
print("Batch ID:", batch.id)

while True:
    batch = client.batches.retrieve(batch.id)
    print("Status:", batch.status)
    if batch.status in {"failed", "completed"}:
        break
    time.sleep(60)

if batch.status == "failed":
    print("Batch failed! Full batch object:")
    print(batch)
    raise SystemExit(1)

result_file_id = batch.output_file_id

result_response = client.files.content(result_file_id)

with open("/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_output.jsonl", "w", encoding="utf-8") as f:
    f.write(result_response.text)

print("Saved outputs")

Uploaded file: file-1Vahe2uZrtfT2w1sYMATgn
Batch ID: batch_68f6dea012bc8190ad45326c41af28a2
Status: validating
Status: validating
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: in_progress
Status: completed
Saved outputs


In [4]:
import json, re
from pathlib import Path

GOLD_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_top_10_ft_without_triples_zero_shot_plus_dynamic_pairs_cot.json")
PRED_PATH   = Path("/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_batch_output.jsonl")
OUTPUT_PATH = Path("/home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_plus_gold.json")

ANSWER_RE = re.compile(r'<Answer>\s*(\{.*\})', re.DOTALL)

def extract_sparql(content: str) -> str:
    m = ANSWER_RE.search(content)
    if not m:
        return ""
    try:
        return json.loads(m.group(1)).get("sparql", "")
    except json.JSONDecodeError:
        return ""

with GOLD_PATH.open(encoding="utf-8") as f:
    gold_records = json.load(f)

pred_lookup = {}
with PRED_PATH.open(encoding="utf-8") as f:
    for line in f:
        rec     = json.loads(line)
        cid     = rec["custom_id"]
        content = rec["response"]["body"]["choices"][0]["message"]["content"]
        pred_lookup[cid] = extract_sparql(content)

for idx, rec in enumerate(gold_records):
    cid = f"example_{idx}"
    rec["refined_pred_query"] = pred_lookup.get(cid, "")

with OUTPUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(gold_records, f, ensure_ascii=False, indent=2)

print(f"Enriched file written → {OUTPUT_PATH}. Total records: {len(gold_records)}")


Enriched file written → /home/m2khoda/dual_retriever/evaluations/end_to_end_evalution/qald_results/qald_test_solo_stage_ft_without_triples_zero_shot_plus_dynamic_pairs_cot_plus_gold.json. Total records: 150
