In [1]:
import networkx as nx
from graph_generator.graphparsers import RelationshipGraphParser
from linearization_utils import *
from retrieval_utils import similarity_search_graph_docs

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document

from typing import List, Dict, Optional, Tuple
import time

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CONFIG = {
    # === Embedding & VectorStore ===
    "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",  # Embedding model for documents/questions
    "faiss_search_k": 3,  # Number of nearest neighbors to retrieve from FAISS

    # === LLM (text generation) ===
    "llm_model_id": "microsoft/Phi-4-mini-reasoning",  # HuggingFace model ID
    "device_map": "auto",  # Device placement: "cuda", "mps", "cpu", or "auto"
    "dtype_policy": "auto",  # Precision: "auto", "bf16", "fp16", or "fp32"
    "max_new_tokens": 256,  # Maximum tokens generated per response
    "do_sample": False,  # Whether to use sampling (True) or greedy decoding (False)
    "temperature": 0.1,  # Randomness control for sampling; lower = more deterministic
    "top_p": 1.0,  # Nucleus sampling threshold; 1.0 = no restriction
    "return_full_text": False,  # Return full text (input+output) if True, only output if False
    "seed": None,  # Random seed for reproducibility; set to int or None

    # === Prompt / Answer ===
    "answer_mode": "YES_NO",  # Answer format mode, e.g., YES/NO
    "answer_uppercase": True,  # If True → "YES"/"NO", else "yes"/"no"

    # === Prompt construction ===
    "include_retrieved_context": True,  # Include retrieved Q&A in prompt
    "include_current_triples": True,  # Include graph triples in prompt
}

try:
    from transformers import set_seed  # Utility for reproducibility
except Exception:
    set_seed = None

## RAG workflow

In [3]:
def _select_dtype() -> torch.dtype:
    """Choose dtype based on CONFIG['dtype_policy'] and hardware."""
    policy = CONFIG.get("dtype_policy", "auto")
    if policy == "bf16":
        return torch.bfloat16
    if policy == "fp16":
        return torch.float16
    if policy == "fp32":
        return torch.float32

    # auto mode
    if torch.cuda.is_available():
        return torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
    # MPS backend works more reliably with fp32
    if torch.backends.mps.is_available():
        return torch.float32
    return torch.float32

def _yn(text_yes="YES", text_no="NO"):
    return (text_yes, text_no) if CONFIG.get("answer_uppercase", True) else (text_yes.lower(), text_no.lower())

# =========================
# Embeddings / Vectorstore
# =========================
emb = HuggingFaceEmbeddings(model_name=CONFIG["embedding_model"])  # Local embedding model (MiniLM-L6-v2, 384 dim)

def build_faiss_index(docs: List[Document]) -> FAISS:
    return FAISS.from_documents(docs, emb)

# =========================
# LLM Loader
# =========================
def load_llm_pipeline(
    model_id: Optional[str] = None,       # HuggingFace model id
    device_map: Optional[str] = None,     # Device placement
    dtype: Optional[torch.dtype] = None,  # Torch dtype
    max_new_tokens: Optional[int] = None, # Max tokens per generation
    temperature: Optional[float] = None,  # Sampling temperature
    top_p: Optional[float] = None,        # Nucleus sampling threshold
    do_sample: Optional[bool] = None,     # Sampling vs greedy
    return_full_text: Optional[bool] = None,  # Return input+output if True
):
    """
    Return a text-generation pipeline for QA generation.
    All defaults pull from CONFIG; any arg here will override CONFIG.
    """
    model_id = model_id or CONFIG["llm_model_id"]
    device_map = device_map or CONFIG["device_map"]
    dtype = dtype or _select_dtype()
    max_new_tokens = max_new_tokens or CONFIG["max_new_tokens"]
    temperature = CONFIG["temperature"] if temperature is None else temperature
    top_p = CONFIG["top_p"] if top_p is None else top_p
    do_sample = CONFIG["do_sample"] if do_sample is None else do_sample
    return_full_text = CONFIG["return_full_text"] if return_full_text is None else return_full_text

    if set_seed and isinstance(CONFIG.get("seed"), int):
        set_seed(CONFIG["seed"])

    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=dtype,
        device_map=device_map,
        trust_remote_code=True,
    )

    gen_pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map=device_map,
        torch_dtype=dtype,
        return_full_text=return_full_text,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_p=top_p,
    )
    return gen_pipe, tokenizer

# =========================
# Question → Graph (generic)
# =========================
def parse_question_to_graph_generic(parser, question: str) -> Tuple[nx.Graph, List[Dict]]:
    """
    Compatible with RelationshipGraphParser.question_to_graph
    and CausalQuestionGraphParser.question_to_causal_graph
    """
    if hasattr(parser, "question_to_graph"):
        return parser.question_to_graph(question)
    elif hasattr(parser, "question_to_causal_graph"):
        return parser.question_to_causal_graph(question)
    else:
        raise AttributeError("Parser must provide question_to_graph or question_to_causal_graph")

# =========================
# Prompt Builder
# =========================
def make_graph_qa_prompt(
    question: str,
    G: nx.Graph,
    relations: Optional[List[Dict]] = None,
    retrieved_docs = None
) -> str:
    # 1) retrieved context (if any)
    sections = []
    if retrieved_docs and CONFIG.get("include_retrieved_context", True):
        doc0, score0 = retrieved_docs[0]
        related_triples = doc0.page_content.strip()
        related_answer  = doc0.metadata.get("llm_answer", "")
        sections.append(
            "<<<RETRIEVED_CONTEXT_START>>>\n"
            "The system searched for a related question in the database. Below are its graph triples and its prior answer.\n"
            f"[RELATED QUESTION'S GRAPH TRIPLES]:\n{related_triples}\n"
            f"[RELATED QUESTION'S ANSWER]: {related_answer}\n"
            "<<<RETRIEVED_CONTEXT_END>>>"
        )

    # 2) current question + triples (optional)
    triples_text = ""
    if relations and CONFIG.get("include_current_triples", True):
        triples_text = "\n".join(
            f"{u} -> {d.get('rel','related_to')} -> {v}"
            for u, v, d in G.edges(data=True)
        )
    q_block = f"[CURRENT QUESTION]: {question}"
    if triples_text.strip():
        q_block += f"\n[CURRENT QUESTION'S GRAPH TRIPLES]:\n{triples_text}"
    sections.append(q_block)

    # 3) task instructions (placed at the end)
    yes, no = _yn("YES", "NO")
    rules = (
        "[TASK]: You are a precise QA assistant for binary (yes/no) questions.\n"
        f"- Output ONLY one token: {yes} or {no}.\n"
        "- Do NOT copy or summarize any context.\n"
        "- Do NOT show reasoning, steps, or extra words.\n"
        "- If retrieved context conflicts with CURRENT QUESTION'S GRAPH TRIPLES, "
        "prefer the CURRENT QUESTION'S GRAPH TRIPLES.\n"
        "- If uncertain, choose NO.\n"
        f"[ANSWER]: "
    )
    sections.append(rules)

    # Final prompt
    prompt = "\n\n".join(sections)
    return prompt

# =========================
# LLM Answerer
# =========================
def answer_with_llm(
    question: str,
    gen_pipe,
    parser,
    faiss_db = None,
    prompt = None
) -> str:
    retrieved_docs = None
    if faiss_db:
        k = CONFIG.get("faiss_search_k", 3)  # Number of docs to retrieve
        _, hits = similarity_search_graph_docs(question, parser, faiss_db, k=k)
        retrieved_docs = hits

    G, rels = parse_question_to_graph_generic(parser, question)

    if prompt == None:
        prompt = make_graph_qa_prompt(question, G, rels, retrieved_docs)

    out = gen_pipe(prompt)
    text = out[0]["generated_text"]

    # If return_full_text=False → only new content; else trim prefix
    if CONFIG.get("return_full_text", True):
        answer = text[len(prompt):].strip()
    else:
        answer = text.strip()

    # Normalize YES/NO case
    yes, no = _yn("YES", "NO")
    a = answer.strip().lower()
    if "yes" in a and "no" not in a:
        answer = yes
    elif "no" in a and "yes" not in a:
        answer = no
    print(answer)
    return answer

# =========================
# Build Docs with LLM Answer
# =========================
def build_docs_with_answer(
    questions: List[str],
    parser,
    gen_pipe,
    *,
    add_prompt_snapshot: bool = False,
    faiss_db = None
) -> List[Document]:
    docs: List[Document] = []
    for qid, q in enumerate(questions, start=1):
        G, rels = parse_question_to_graph_generic(parser, q)
        text = build_relationship_text(q, G, rels)  # Output [QUESTION][GRAPH][TRIPLES]

        # Get LLM answer
        answer = answer_with_llm(q, gen_pipe, parser, faiss_db)

        metadata = {
            "graph_id": f"Q{qid}",
            "question": q,
            "num_nodes": G.number_of_nodes(),
            "num_edges": G.number_of_edges(),
            "llm_model": CONFIG["llm_model_id"],
            "llm_answer": answer,
            "created_at": int(time.time()),
        }
        if add_prompt_snapshot:
            metadata["prompt_snapshot"] = make_graph_qa_prompt(q, G, rels)

        docs.append(Document(page_content=text, metadata=metadata))
    return docs


def build_faiss_index(docs: List[Document]) -> FAISS:
    vectordb = FAISS.from_documents(docs, emb)
    return vectordb


  emb = HuggingFaceEmbeddings(model_name=CONFIG["embedding_model"])  # Local embedding model (MiniLM-L6-v2, 384 dim)


### Answer questions in bulk and load them into the database.

In [None]:
# 1) Parser
parser = RelationshipGraphParser()   # or CausalQuestionGraphParser()

# 2) Load Phi-4-mini-reasoning
gen_pipe, _ = load_llm_pipeline(
    model_id="microsoft/Phi-4-mini-reasoning",
    device_map="auto",
    dtype=None,                # Automatically select appropriate precision
    max_new_tokens=256,
    temperature=0.2,           # Control randomness
)

# 3) Question set
questions = [
    "Is the Great Wall of China located in China?",
    "Does the Great Wall span over 13000 miles?", 
    "Was the Great Wall built during the Ming Dynasty?",
    "Can the Great Wall be seen from space?",
    "Is the Great Wall made of stone and brick?",
    "Does the Great Wall have watchtowers?",
    "Was the Great Wall constructed over 2000 years?",
    "Is the Great Wall a UNESCO World Heritage Site?",
    "Does the Great Wall stretch across northern China?",
    "Are millions of tourists visiting the Great Wall annually?"
]

# 4) Build documents (including LLM answers in metadata)
docs = build_docs_with_answer(
    questions, parser, gen_pipe, add_prompt_snapshot=False
)

# 5) Vectorization & Save
emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
faiss_db = FAISS.from_documents(docs, emb)
faiss_db.save_local("graph_rag_faiss_index")
print(f"FAISS index ready. docs={len(docs)}")


# To load later:
# faiss_db = FAISS.load_local("graph_rag_faiss_index", emb, allow_dangerous_deserialization=True)


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.09it/s]
Device set to use mps
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO
NO
FAISS index ready. docs=10


In [8]:
print(docs)

[Document(metadata={'graph_id': 'Q1', 'question': 'Is the Great Wall of China located in China?', 'num_nodes': 3, 'num_edges': 2, 'llm_model': 'microsoft/Phi-4-mini-reasoning', 'llm_answer': 'NO', 'created_at': 1755590585}, page_content='the Great Wall of China -> subj -> locate\nlocate -> prep_in -> China'), Document(metadata={'graph_id': 'Q2', 'question': 'Does the Great Wall span over 13000 miles?', 'num_nodes': 3, 'num_edges': 2, 'llm_model': 'microsoft/Phi-4-mini-reasoning', 'llm_answer': 'NO', 'created_at': 1755590586}, page_content='Great Wall span -> subj -> do\ndo -> prep_over -> 13000 miles'), Document(metadata={'graph_id': 'Q3', 'question': 'Was the Great Wall built during the Ming Dynasty?', 'num_nodes': 3, 'num_edges': 2, 'llm_model': 'microsoft/Phi-4-mini-reasoning', 'llm_answer': 'NO', 'created_at': 1755590586}, page_content='Great Wall -> subj -> build\nbuild -> prep_during -> the Ming Dynasty'), Document(metadata={'graph_id': 'Q4', 'question': 'Can the Great Wall be se

### Test for answering individual questions (adjust prompt with no database context)

In [4]:
parser = RelationshipGraphParser()   #

gen_pipe, _ = load_llm_pipeline(
    model_id="microsoft/Phi-4-mini-reasoning",
    device_map="auto",
    dtype=None,                #
    max_new_tokens=256,
    temperature=0.2,
)

questions = "Is the Great Wall visible from low Earth orbit?"
faiss_db = FAISS.load_local("graph_rag_faiss_index", emb, allow_dangerous_deserialization=True)
#answer = answer_with_llm(questions, gen_pipe, parser)
answer = answer_with_llm(questions, gen_pipe, parser, faiss_db)



Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.92s/it]
Device set to use mps
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


In [4]:
import time
import torch
from typing import Optional, List, Dict, Tuple
import pandas as pd

# Dependency: CONFIG / make_graph_qa_prompt / parse_question_to_graph_generic
# and the retrieval function similarity_search_graph_docs
# already defined in your current file, no modification needed

def _get_retrieved_docs_for_prompt(
    question: str,
    parser,
    faiss_db=None,
    k: Optional[int] = None,
):
    """Decide whether to retrieve based on CONFIG['include_retrieved_context'], return hits ([(Document, score), ...])."""
    if not faiss_db or not CONFIG.get("include_retrieved_context", True):
        return None
    k = k or CONFIG.get("faiss_search_k", 3)
    _, hits = similarity_search_graph_docs(question, parser, faiss_db, k=k)
    return hits if hits else None

def _count_tokens(tokenizer, text: str) -> int:
    return len(tokenizer.encode(text, add_special_tokens=False))

def measure_once(
    question: str,
    gen_pipe,              # pipeline from load_llm_pipeline
    tokenizer,             # tokenizer from load_llm_pipeline (used for counting tokens)
    parser,
    faiss_db=None,
    *,
    label: Optional[str] = None,
    use_cuda_mem: bool = True,
) -> Dict:
    """
    According to current CONFIG, construct the prompt (controlled by include_retrieved_context / include_current_triples),
    then call LLM, measuring once:
      - input_tokens / output_tokens / total_tokens
      - latency_sec
      - (optional) peak_vram_MiB
      - record whether retrieval and triples are used
    """
    # 1) Retrieval (if enabled)
    retrieved_docs = _get_retrieved_docs_for_prompt(
        question, parser, faiss_db=faiss_db, k=CONFIG.get("faiss_search_k", 3)
    )

    # 2) Parse the current question into graph/triples
    G, rels = parse_question_to_graph_generic(parser, question)

    # 3) Construct prompt (internally decides whether to include triples based on CONFIG['include_current_triples'])
    prompt = make_graph_qa_prompt(
        question=question,
        G=G,
        relations=rels,
        retrieved_docs=retrieved_docs
    )

    # 4) Count input tokens
    in_tok = _count_tokens(tokenizer, prompt)

    # 5) Timing & generation (optional: peak GPU memory)
    peak_mem = None
    if use_cuda_mem and torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()
        torch.cuda.synchronize()

    t0 = time.perf_counter()
    answer = answer_with_llm(question, gen_pipe, parser, faiss_db, prompt)
    dt = time.perf_counter() - t0

    # 7) Count output tokens
    out_tok = _count_tokens(tokenizer, answer)

    # 8) Peak GPU memory usage
    if use_cuda_mem and torch.cuda.is_available():
        torch.cuda.synchronize()
        peak_mem = torch.cuda.max_memory_allocated() / (1024**2)

    # 9) Mark whether retrieval/triples were used
    used_retrieval = bool(retrieved_docs)
    used_triples = bool(rels) and CONFIG.get("include_current_triples", True)

    return {
        "label": label or ("with_graph_ctx" if used_triples or used_retrieval else "no_graph_ctx"),
        "question": question,
        "input_tokens": in_tok,
        "output_tokens": out_tok,
        "total_tokens": in_tok + out_tok,
        "latency_sec": dt,
        "peak_vram_MiB": peak_mem,
        "used_retrieval": used_retrieval,
        "used_current_triples": used_triples,
        "prompt_chars": len(prompt),
        "answer": answer,
    }

# ===== Batch evaluation & summary (optional) =====
def batch_measure(
    questions: List[str],
    gen_pipe,
    tokenizer,
    parser,
    faiss_db=None,
    *,
    flip_configs: List[Dict] = None,
) -> pd.DataFrame:
    """
    Run multiple CONFIG combinations (e.g. with/without retrieval, with/without triples) on a question set,
    return a summary DataFrame.
    flip_configs: each element is a local override of CONFIG, for example:
        [{"include_retrieved_context": False, "include_current_triples": False, "label": "no_ctx"},
         {"include_retrieved_context": True,  "include_current_triples": True,  "label": "with_both"}]
    """
    rows = []
    if not flip_configs:
        flip_configs = [ {"label": "current_CONFIG"} ]

    for cfg in flip_configs:
        # Save old values, temporarily override
        old_retrieve = CONFIG.get("include_retrieved_context", True)
        old_triples  = CONFIG.get("include_current_triples", True)
        if "include_retrieved_context" in cfg:
            CONFIG["include_retrieved_context"] = cfg["include_retrieved_context"]
        if "include_current_triples" in cfg:
            CONFIG["include_current_triples"] = cfg["include_current_triples"]

        for q in questions:
            try:
                rec = measure_once(
                    question=q,
                    gen_pipe=gen_pipe,
                    tokenizer=tokenizer,
                    parser=parser,
                    faiss_db=faiss_db,
                    label=cfg.get("label")
                )
                rows.append(rec)
            except Exception as e:
                rows.append({
                    "label": cfg.get("label"),
                    "question": q,
                    "error": str(e)
                })

        # Restore CONFIG
        CONFIG["include_retrieved_context"] = old_retrieve
        CONFIG["include_current_triples"]   = old_triples

    return pd.DataFrame(rows)

def summarize_cost(df: pd.DataFrame, base_label: str, target_label: str):
    """Compare average cost of two configurations and print relative changes (%)."""
    A = df[df["label"]==base_label]
    B = df[df["label"]==target_label]
    if A.empty or B.empty:
        print("Not enough data for comparison.")
        return

    def avg(col):
        a, b = A[col].mean(), B[col].mean()
        return a, b, (b-a)/max(1e-9, a)

    for col in ["input_tokens","output_tokens","total_tokens","latency_sec","peak_vram_MiB","prompt_chars"]:
        if col in df.columns:
            a,b,d = avg(col)
            print(f"{col:>15s} | {base_label}: {a:8.2f} | {target_label}: {b:8.2f} | Δ%: {d*100:7.2f}%")


In [5]:
# 1) Load
gen_pipe, tokenizer = load_llm_pipeline()   # Use your loader above
parser = RelationshipGraphParser()
faiss_db = FAISS.load_local("graph_rag_faiss_index", emb, allow_dangerous_deserialization=True)

# 2) Single-question measurement (under current CONFIG)
rec = measure_once(
    "Is the Great Wall visible from low Earth orbit?",
    gen_pipe, tokenizer, parser, faiss_db, label="current_CONFIG"
)
print(rec)

# 3) Batch A/B comparison (no context vs. both retrieval & triples)
questions = [
    "Is the Great Wall visible from low Earth orbit?",
    "Was the Great Wall built during the Ming Dynasty?",
    "Does the Great Wall have watchtowers?"
]
df = batch_measure(
    questions, gen_pipe, tokenizer, parser, faiss_db,
    flip_configs=[
        {"include_retrieved_context": False, "include_current_triples": False, "label": "no_ctx"},
        {"include_retrieved_context": True,  "include_current_triples": True,  "label": "with_both"},
    ]
)
print(df.head())
print("\n=== Summary ===")
summarize_cost(df, base_label="no_ctx", target_label="with_both")


Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.60s/it]
Device set to use mps
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO
{'label': 'current_CONFIG', 'question': 'Is the Great Wall visible from low Earth orbit?', 'input_tokens': 188, 'output_tokens': 1, 'total_tokens': 189, 'latency_sec': 0.7058252499999966, 'peak_vram_MiB': None, 'used_retrieval': True, 'used_current_triples': True, 'prompt_chars': 797, 'answer': 'NO'}


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


NO
NO
       label                                           question  input_tokens  \
0     no_ctx    Is the Great Wall visible from low Earth orbit?            97   
1     no_ctx  Was the Great Wall built during the Ming Dynasty?            97   
2     no_ctx              Does the Great Wall have watchtowers?            96   
3  with_both    Is the Great Wall visible from low Earth orbit?           188   
4  with_both  Was the Great Wall built during the Ming Dynasty?           197   

   output_tokens  total_tokens  latency_sec peak_vram_MiB  used_retrieval  \
0              1            98     0.397436          None           False   
1              1            98     0.278040          None           False   
2              1            97     0.303288          None           False   
3              1           189     0.601151          None            True   
4              1           198     0.709760          None            True   

   used_current_triples  prompt_chars answer