In [1]:
!nvidia-smi

Tue Oct 21 12:30:58 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.05              Driver Version: 560.35.05      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  |   00000000:81:00.0 Off |                  N/A |
|  0%   41C    P8             31W /  370W |       2MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
!hostname
!which python
import torch
print("CUDA available:", torch.cuda.is_available())


limbo
/opt/miniforge3/envs/jupyterhub/bin/python
CUDA available: True


In [None]:
!kill 2816519

In [3]:
# Block 0 — Imports & Config

import os
import json
import re
import time
from textwrap import dedent

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


# Toggle: include "mentions" in final output JSONL (set False to match existing shape strictly)
OUTPUT_MENTIONS = False


def setup_model(model_id: str = "mistralai/Mistral-7B-Instruct-v0.3"):
    print("⏳ Loading model:", model_id)
    torch.backends.cudnn.benchmark = True

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16,
    )
    model.config.use_cache = True  # KV caching is fine.

    generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto",
    )
    return generator, tokenizer


In [4]:
# Block 1 — IO Utilities (JSONL, few-shot loader)

def read_jsonl(path, max_items: int | None = None):
    """Yield JSON objects from .jsonl; stop after max_items if provided."""
    count = 0
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            yield json.loads(line)
            count += 1
            if max_items is not None and count >= max_items:
                break


def write_jsonl(path, records):
    with open(path, "w", encoding="utf-8") as f:
        for rec in records:
            f.write(json.dumps(rec, ensure_ascii=False))
            f.write("\n")


In [5]:
# Block 2 — Ontology Helpers (labels, formatting)

def _concept_label(ontology_json, qid):
    """
    Map a concept qid to its human-readable label (string). If qid is already a label, return as-is.
    """
    # Accept label passthrough
    if isinstance(qid, str) and not qid.startswith("Q"):
        return qid
    for c in ontology_json.get("concepts", []):
        if c.get("qid") == qid:
            return c.get("label", "")
    return ""


def format_ontology_concepts(ontology_json):
    return ", ".join(c.get("label", "") for c in ontology_json.get("concepts", []))


def format_ontology_relations(ontology_json):
    lines = []
    for r in ontology_json.get("relations", []):
        dom = _concept_label(ontology_json, r.get("domain"))
        rng = _concept_label(ontology_json, r.get("range"))
        lines.append(f'- {r.get("label")}({dom},{rng})')
    return "\n".join(lines)


def build_relation_sig_maps(ontology_json):
    """
    Return mapping: relation_label -> (domain_label, range_label)
    """
    rel2dom = {}
    rel2rng = {}
    for r in ontology_json.get("relations", []):
        label = r.get("label")
        dom = _concept_label(ontology_json, r.get("domain")) or r.get("domain")
        rng = _concept_label(ontology_json, r.get("range")) or r.get("range")
        if label:
            rel2dom[label] = dom
            rel2rng[label] = rng
    return rel2dom, rel2rng


In [6]:
# Block 3 — Prompt 1 Builder (JSON-only: mentions + triples)

from typing import Dict, Any, Optional
import json

def _escape_multiline(s: str) -> str:
    return s.replace("\\", "\\\\").replace('"', '\\"')

def build_prompt1_json_only(
    ontology_json: Dict[str, Any],
    test_sentence: str,
    k: int = 6,
) -> str:
    """
    Build Prompt 1 instruction that returns a single JSON object with fields:
      mentions[], triples[].

    The triples schema is aligned to the evaluator:
      - subject_type / object_type
      - confidence_prompt
      - support as a list of {quote, char_span}
      - ontology_domain_range_check (the model may set it; we recompute later)
    """
    # These helpers are expected from Block 2
    concepts_txt = format_ontology_concepts(ontology_json)
    relations_txt = format_ontology_relations(ontology_json)

    prompt = f"""System
You are a KG triple proposer in a Tree-of-Thoughts loop. First detect entity mentions and assign tentative ontology types. Then, using those mentions, propose candidate triples that are valid under the ontology (domain→range). Return only JSON.

User
Task: From the text, 1) list detected mentions with tentative types, 2) propose up to k={k} candidate triples [subject, relation, object].
Use only relations whose domain/range match the types you inferred. For each triple, include confidence ∈ [0,1] and cite the exact supporting span(s).

Text
"{_escape_multiline(test_sentence)}"

Ontology concepts
{concepts_txt}

Ontology relations (domain → range)
{relations_txt}

Output format (JSON only)
{{
  "mentions": [
    {{"surface": "...", "type_candidates": ["ConceptA","ConceptB"], "span": [start,end]}}
  ],
  "triples": [
    {{
      "triple": ["subject","relation","object"],
      "subject_type": "ConceptA",   // required for evaluator Ontology
      "object_type": "ConceptB",    // required for evaluator Ontology
      "confidence_prompt": 0.0,     // rename from 'confidence'
      "support": [
        {{"quote": "exact substring from text", "char_span": [start,end]}}
      ],
      "notes": "why domain/range fits; any pronoun/coref resolution; date normalization, etc.",
      "ontology_domain_range_check": true  // model may set; we'll recompute deterministically later
    }}
  ]
}}

Constraints
- Only output domain/range-valid triples.
- Provide at least one support quote with char_span; support.quote MUST be an exact substring of the text.
- Normalize dates to YYYY-MM-DD when possible.
- If a pronoun is required, resolve it to the nearest valid antecedent and describe it in notes.
- Do not invent entities not in the text. Omit triples that are not explicitly supported.
"""
    return prompt


In [7]:
# Block 4 — Single Inference (chat template; continuation-only)

def generate_json_text(generator, tokenizer, prompt_text: str,
                       max_new_tokens: int = 768, temperature: float = 0.25) -> str:
    """
    Calls the model once. Returns the generated continuation (string).
    """
    chat = [
        {"role": "system", "content": "You are a precise information-extraction model. Follow instructions carefully and return only JSON."},
        {"role": "user", "content": prompt_text}
    ]
    formatted = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

    out = generator(
        formatted,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=0.9,
        do_sample=True,
        return_full_text=False,
        truncation=False,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )
    return out[0]["generated_text"] if isinstance(out[0], dict) else out[0]


In [8]:
# Block 5 — Prompt 1 Parser & Post-Validation (spans, domain/range, dedupe, schema normalization)

from typing import Dict, Any, List, Tuple
import json
import re

def _support_is_substring(text: str, quote: str) -> bool:
    return quote and quote in text

def _span_ok(text: str, span: List[int], quote: str) -> bool:
    if not span or len(span) != 2:
        return False
    s, e = span
    if s < 0 or e < 0 or s >= e or e > len(text):
        return False
    return text[s:e] == quote

def _ensure_surface_span_consistent(text: str, parsed: Dict[str, Any]) -> Dict[str, Any]:
    """
    If mention spans are missing but surfaces exist, try to fill spans via exact match.
    """
    for m in parsed.get("mentions", []):
        surf = m.get("surface") or ""
        span = m.get("span")
        if not surf:
            continue
        if not span or not isinstance(span, list) or len(span) != 2:
            start = text.find(surf)
            if start >= 0:
                m["span"] = [start, start + len(surf)]
    return parsed

def build_relation_sig_maps(ontology_json: Dict[str, Any]) -> Dict[str, Dict[str, str]]:
    """
    returns: {"relation": {"domain": "ConceptA", "range": "ConceptB"}}
    """
    relsig = {}
    rels = ontology_json.get("relations") or ontology_json.get("Relations") or []
    for r in rels:
        name = r.get("name") or r.get("relation") or r.get("label")
        dom  = r.get("domain")
        rng  = r.get("range")
        if name and dom and rng:
            relsig[str(name)] = {"domain": str(dom), "range": str(rng)}
    return relsig

def _domain_range_valid(triple_obj: Dict[str, Any], relsig: Dict[str, Dict[str,str]]) -> bool:
    t = triple_obj.get("triple") or ["", "", ""]
    rel = t[1] if len(t) == 3 else ""
    st  = triple_obj.get("subject_type") or ""
    ot  = triple_obj.get("object_type") or ""
    sig = relsig.get(rel, {})
    return bool(sig) and (st == sig.get("domain")) and (ot == sig.get("range"))

def _index_mentions(parsed: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
    idx = {}
    for m in parsed.get("mentions", []):
        s = (m.get("surface") or "").strip()
        if not s:
            continue
        idx.setdefault(s, m)
    return idx

def _dedupe_triples(parsed: Dict[str, Any]) -> Dict[str, Any]:
    seen = set()
    new = []
    for t in parsed.get("triples", []):
        k = tuple(t.get("triple") or [])
        if len(k) != 3:
            continue
        if k in seen:
            continue
        seen.add(k)
        new.append(t)
    parsed["triples"] = new
    return parsed

def parse_prompt1_json(raw: str) -> Dict[str, Any]:
    """
    Be forgiving about minor format deviations; normalize to evaluator-ready schema.
    - rename 'confidence' -> 'confidence_prompt'
    - coerce support into list of {quote, char_span}
    - ensure subject_type/object_type keys exist (even if empty)
    - drop triples with missing evidence quotes or bad spans
    """
    try:
        obj = json.loads(raw)
    except Exception:
        # try to recover code block content
        m = re.search(r"\{[\s\S]*\}", raw)
        obj = json.loads(m.group(0)) if m else {"mentions": [], "triples": []}

    obj.setdefault("mentions", [])
    obj.setdefault("triples", [])

    # normalize triple entries
    for t in obj["triples"]:
        # rename confidence
        if "confidence_prompt" not in t and "confidence" in t:
            t["confidence_prompt"] = t.pop("confidence")
        t.setdefault("confidence_prompt", 0.0)

        # normalize support: allow string or list; convert to list of {quote, char_span}
        sup = t.get("support")
        if isinstance(sup, str):
            t["support"] = [{"quote": sup, "char_span": []}]
        elif isinstance(sup, list):
            new_sup = []
            for s in sup:
                if isinstance(s, dict) and "quote" in s:
                    s.setdefault("char_span", [])
                    new_sup.append(s)
                elif isinstance(s, str):
                    new_sup.append({"quote": s, "char_span": []})
            t["support"] = new_sup
        else:
            t["support"] = []

        # ensure types/flags exist
        t.setdefault("subject_type", "")
        t.setdefault("object_type", "")
        t.setdefault("notes", "")
        t.setdefault("ontology_domain_range_check", False)

    return obj

def post_validate_prompt1(
    text: str,
    parsed: Dict[str, Any],
    ontology_json: Dict[str, Any]
) -> Dict[str, Any]:
    """
    Enforce evaluator readiness:
      - support.quote must be exact substring
      - if char_span present, it must match quote
      - recompute ontology_domain_range_check deterministically
      - drop triples that fail any rule
    """
    relsig = build_relation_sig_maps(ontology_json)

    # fix mentions spans opportunistically
    parsed = _ensure_surface_span_consistent(text, parsed)

    kept = []
    for t in parsed.get("triples", []):
        triple = t.get("triple") or []
        if len(triple) != 3:
            continue

        # evidence check
        supports = t.get("support") or []
        if not supports:
            continue

        ok_ev = True
        for s in supports:
            quote = s.get("quote") or ""
            span  = s.get("char_span") or []
            if not _support_is_substring(text, quote):
                ok_ev = False
                break
            if span:
                if not _span_ok(text, span, quote):
                    ok_ev = False
                    break
        if not ok_ev:
            continue

        # ontology check (deterministic)
        t["ontology_domain_range_check"] = _domain_range_valid(t, relsig)

        # keep only ontology-valid triples
        if not t["ontology_domain_range_check"]:
            continue

        kept.append(t)

    parsed["triples"] = kept
    parsed = _dedupe_triples(parsed)
    return parsed


In [9]:
# Block 6 — Orchestrator (Prompt 1 end-to-end, zero-shot, with full debugging)

from typing import Optional, List, Dict, Any, Tuple
import json

# Priority keys we will try to pull the input text from
_TEXT_KEYS_PRIORITY = (
    "text", "Text", "sentence", "Sentence", "input", "Input",
    "content", "document", "doc", "passage", "article", "context",
    "raw_text", "body", "body_text", "main_text", "wiki_text"
)

def _extract_text_field(rec: Dict[str, Any]) -> Tuple[str, str]:
    """
    Try multiple common keys to fetch the input text.
    Returns (text, key_used). If none match, falls back to the longest string-valued field (>= 10 chars).
    """
    for k in _TEXT_KEYS_PRIORITY:
        val = rec.get(k)
        if isinstance(val, str) and val.strip():
            return val.strip(), k
    # Fallback: pick the longest string field >= 10 chars
    longest_key = None
    longest_text = ""
    for k, v in rec.items():
        if isinstance(v, str) and len(v.strip()) >= 10 and len(v) > len(longest_text):
            longest_text = v.strip()
            longest_key = k
    return (longest_text, longest_key or "")

def run_pipeline_prompt1(
    input_jsonl_path: str,
    ontology_json_path: str,
    output_jsonl_path: str,
    max_items: Optional[int] = None,
    max_new_tokens: int = 768,
    temperature: float = 0.25,
    verbose: bool = True,
    k: int = 6,
) -> List[Dict[str, Any]]:
    """
    Orchestrates Prompt 1 (zero-shot):
      - loads ontology and inputs
      - builds Prompt 1 instruction (no few-shots)
      - calls the model (Block 4: generate_json_text(generator, tokenizer, prompt_text, ...))
      - parses and post-validates JSON to be evaluator-ready
      - writes output JSONL with raw and validated objects

    This version prints:
      * the EXACT prompt sent to the model,
      * the RAW model output string,
      * the PARSED JSON,
      * the POST-VALIDATED JSON (evaluator-ready),
    before saving to disk.
    """
    # Load ontology
    with open(ontology_json_path, "r", encoding="utf-8") as f:
        ontology_json = json.load(f)

    # Load inputs
    items = list(read_jsonl(input_jsonl_path, max_items=max_items))
    if verbose:
        print(f"[RUN] Loaded {len(items)} input items from {input_jsonl_path}")

    # Setup model (Block 4): must return (generator, tokenizer)
    generator, tokenizer = setup_model()

    outputs: List[Dict[str, Any]] = []

    for idx, rec in enumerate(items, start=1):
        rid = str(rec.get("id") or f"item_{idx}")
        text, key_used = _extract_text_field(rec)

        if verbose:
            print(f"\n[RUN] === ID={rid} ===")
            print(f"[INFO] Text source key: {key_used!r}")
            if not text:
                print("[WARN] No non-empty text found in typical keys; also failed longest-string fallback.")

        if not text:
            out_rec = {
                "id": rid,
                "input_text": text,
                "prompt1_prompt": "",
                "prompt1_raw": "{}",
                "prompt1_parsed": {"mentions": [], "triples": []},
                "prompt1_validated": {"mentions": [], "triples": []},
                "error": "empty_input_text"
            }
            outputs.append(out_rec)
            continue

        # Build prompt (Block 3)
        prompt_text = build_prompt1_json_only(
            ontology_json=ontology_json,
            test_sentence=text,
            k=k,
        )

        # === DEBUG: print the full prompt ===
        print("\n==== [DEBUG] FINAL PROMPT SENT TO MODEL ====")
        print(prompt_text)
        print("==== [DEBUG] END PROMPT ====\n")

        # Generate model output (Block 4 signature: (generator, tokenizer, prompt_text, ...))
        try:
            raw = generate_json_text(
                generator,
                tokenizer,
                prompt_text,
                max_new_tokens=max_new_tokens,
                temperature=temperature
            )
        except Exception as e:
            print(f"[ERROR] Generation failed for ID={rid}: {e}")
            raw = "{}"

        # === DEBUG: print raw model output ===
        print("==== [DEBUG] RAW MODEL OUTPUT ====")
        print(raw)
        print("==== [DEBUG] END RAW OUTPUT ====\n")

        # Parse (Block 5)
        try:
            parsed = parse_prompt1_json(raw)
        except Exception as e:
            print(f"[ERROR] Parse failed for ID={rid}: {e}")
            parsed = {"mentions": [], "triples": []}

        # === DEBUG: print parsed JSON ===
        print("==== [DEBUG] PARSED JSON ====")
        try:
            print(json.dumps(parsed, ensure_ascii=False, indent=2))
        except Exception:
            print(parsed)
        print("==== [DEBUG] END PARSED ====\n")

        # Post-validate for evaluator readiness (Block 5)
        try:
            validated = post_validate_prompt1(text, parsed, ontology_json)
        except Exception as e:
            print(f"[ERROR] Post-validate failed for ID={rid}: {e}")
            validated = {"mentions": [], "triples": []}

        # === DEBUG: print validated JSON ===
        print("==== [DEBUG] VALIDATED JSON (Evaluator-ready) ====")
        try:
            print(json.dumps(validated, ensure_ascii=False, indent=2))
        except Exception:
            print(validated)
        print("==== [DEBUG] END VALIDATED ====\n")

        out_rec = {
            "id": rid,
            "input_text": text,
            "prompt1_prompt": prompt_text,
            "prompt1_raw": raw,
            "prompt1_parsed": parsed,
            "prompt1_validated": validated,
        }
        outputs.append(out_rec)

    # Write results
    write_jsonl(output_jsonl_path, outputs)
    if verbose:
        print(f"[RUN] Done. Wrote {len(outputs)} records to: {output_jsonl_path}")

    return outputs


In [10]:
# Block 7 — Example Run (paths: replace with your actual ones)




# Block 7 — Example Run (paths: replace with your actual ones; prints first record keys)

# Paths — update to your filesystem
ONTOLOGY_JSON = "/upb/users/b/balram/profiles/unix/cs/promptKG/data/input/wikidata/input_ontology/1_movie_ontology.json"
INPUT_JSONL   =  "/upb/users/b/balram/profiles/unix/cs/promptKG/data/input/wikidata/input_text/ont_1_movie_test.jsonl" 
OUTPUT_JSONL  = "/upb/users/b/balram/profiles/unix/cs/promptKG/data/output/prompt1/wikidata/ont_1_movie_output_test.jsonl" 

# Run parameters
MAX_ITEMS       = 1        # keep 1 to make the debug printouts manageable
MAX_NEW_TOKENS  = 768
TEMPERATURE     = 0.25
VERBOSE         = True
K_CANDIDATES    = 6

# Pre-flight: show keys of the first input record to confirm the text field name
first = next(read_jsonl(INPUT_JSONL, max_items=1), None)
if first is None:
    print(f"[ERROR] No records found in: {INPUT_JSONL}")
else:
    print("[DEBUG] First input record keys:", list(first.keys()))
    # Also show a short preview of any string fields
    for k, v in first.items():
        if isinstance(v, str):
            print(f"  - {k}: {v[:120]}{'...' if len(v) > 120 else ''}")

# Execute pipeline (Block 6)
_ = run_pipeline_prompt1(
    input_jsonl_path=INPUT_JSONL,
    ontology_json_path=ONTOLOGY_JSON,
    output_jsonl_path=OUTPUT_JSONL,
    max_items=MAX_ITEMS,
    max_new_tokens=MAX_NEW_TOKENS,
    temperature=TEMPERATURE,
    verbose=VERBOSE,
    k=K_CANDIDATES,
)


[DEBUG] First input record keys: ['id', 'sent']
  - id: ont_1_movie_test_1
  - sent: Bleach: Hell Verse (Japanese: BLEACH , Hepburn: BurÄ«chi Jigoku-Hen) is a 2010 Japanese animated film directed by Noriyu...
[RUN] Loaded 1 input items from /upb/users/b/balram/profiles/unix/cs/promptKG/data/input/wikidata/input_text/ont_1_movie_test.jsonl
⏳ Loading model: mistralai/Mistral-7B-Instruct-v0.3


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0



[RUN] === ID=ont_1_movie_test_1 ===
[INFO] Text source key: 'sent'

==== [DEBUG] FINAL PROMPT SENT TO MODEL ====
System
You are a KG triple proposer in a Tree-of-Thoughts loop. First detect entity mentions and assign tentative ontology types. Then, using those mentions, propose candidate triples that are valid under the ontology (domain→range). Return only JSON.

User
Task: From the text, 1) list detected mentions with tentative types, 2) propose up to k=6 candidate triples [subject, relation, object].
Use only relations whose domain/range match the types you inferred. For each triple, include confidence ∈ [0,1] and cite the exact supporting span(s).

Text
"Bleach: Hell Verse (Japanese: BLEACH , Hepburn: BurÄ«chi Jigoku-Hen) is a 2010 Japanese animated film directed by Noriyuki Abe."

Ontology concepts
human, city, country, film, film genre, genre, film production company, film award, award, written work, film character, film organization

Ontology relations (domain → range)
- directo