In [None]:
from huggingface_hub import hf_hub_download
import json
from pathlib import Path

repo_id = "KingPawnUSA/king-gold-and-pawn-operations-bible"
chunks_path = hf_hub_download(repo_id=repo_id, filename="rag/chunks/kingpawn_chunks.jsonl", repo_type="dataset")
prompts_path = hf_hub_download(repo_id=repo_id, filename="training_data/llm_prompts.jsonl", repo_type="dataset")

def load_jsonl(path):
    records = []
    with open(path, "r", encoding="utf-8") as fh:
        for line in fh:
            line = line.strip()
            if line:
                records.append(json.loads(line))
    return records

chunks = load_jsonl(chunks_path)
prompts = load_jsonl(prompts_path)
print(f"Loaded {len(chunks)} chunks and {len(prompts)} prompt exemplars.")

In [None]:
import math

def simple_retrieval(query, top_k=2):
    scored = []
    q = query.lower()
    for chunk in chunks:
        text = chunk["text"].lower()
        overlap = sum(1 for token in q.split() if token in text)
        if overlap:
            scored.append((overlap, chunk))
    scored.sort(key=lambda x: x[0], reverse=True)
    return [c for _, c in scored[:top_k]]

question = "How long is the pawn loan term before the final notice?"
for result in simple_retrieval(question):
    print(f"Chunk {result['chunk_id']} ({result['metadata']['language']}): {result['text']}")

question_es = "¿Qué política aplica para renovaciones por SMS?"
for result in simple_retrieval(question_es):
    print(f"Chunk {result['chunk_id']} ({result['metadata']['language']}): {result['text']}")