In [None]:
import pymupdf
import re,uuid
from sentence_transformers import SentenceTransformer
import chromadb
from tqdm import tqdm
from openai import OpenAI

# Core Book Embedding

In [None]:
core_book = pymupdf.open("Cyberpunk Red Core.pdf")
client_open_ai = OpenAI(api_key= "")

In [None]:
# Core Book RAG
def core_book_rag(core_book_pdf):
    collection_name = "cpr_core_rules"
    rag_model_name = "all-MiniLM-L6-v2"
    chromadb_path = "./vectordb"
    # Breaks the book down into page map with Chapter Title and Pages
    core_book = pymupdf.open(core_book_pdf)
    chunks = []
    toc = core_book.get_toc(simple=True)
    page_map = {}
    toc_index = 0
    for page_num in range(len(core_book)):
            while toc_index + 1 < len(toc) and toc[toc_index + 1][2] <= page_num + 1:
                toc_index += 1
            page_map[page_num] = toc[toc_index][1] if toc else "Unknown"
    for page_num in range(len(core_book)):
        page = core_book.load_page(page_num)
        text = page.get_text("text")
        chapter = page_map.get(page_num, "Unknown")

        paragraphs = re.split(r"\n{2,}", text)
        for para in paragraphs:
            clean = para.strip()
            if len(clean) > 30:
                chunks.append({
                    "id": str(uuid.uuid4()),
                    "text": clean,
                    "meta": {
                        "page": page_num + 1,
                        "chapter": chapter
                    }
                })
    rag_model = SentenceTransformer(rag_model_name)
    chroma_client = chromadb.PersistentClient(path=chromadb_path)
    collection = chroma_client.get_or_create_collection(collection_name)

    print(f"Embedding {len(chunks)} chunks...")
    for chunk in tqdm(chunks):
        emb = rag_model.encode(chunk["text"]).tolist()
        collection.add(
            ids=[chunk["id"]],
            embeddings=[emb],
            documents=[chunk["text"]],
            metadatas=[chunk["meta"]]
        )

In [None]:
def query_chroma(collection: str, query: str, k=5):
    model = SentenceTransformer("all-MiniLM-L6-v2")
    chroma_client = chromadb.PersistentClient(path="./vectordb")
    collection = chroma_client.get_collection(collection)

    query_emb = model.encode(query).tolist()
    results = collection.query(query_embeddings=[query_emb], n_results=k)

    return [
        {
            "text": doc,
            "page": meta["page"],
            "chapter": meta["chapter"]
        }
        for doc, meta in zip(results["documents"][0], results["metadatas"][0])
    ]

In [None]:
for result in query_chroma("cpr_core_rules", "What are the rules for cover?"):
    pprint.pprint(result)

In [None]:
def ask_gpt4o_with_rag(user_query: str, temp=0.4):
    relevant_rules = query_chroma("cpr_core_rules",user_query)
    context_text = "\n\n".join(
        f"[{r['chapter']} – p.{r['page']}]\n{r['text']}" for r in relevant_rules
    )

    messages = [
        {
            "role": "system",
            "content": (
                "You are a Cyberpunk RED game assistant. Answer the user's question "
                "using only the official rules. Include relevant citations (e.g., page number and chapter). "
                "If the rules are unclear, say so rather than guessing."
            )
        },
        {
            "role": "user",
            "content": f"{user_query}\n\nRelevant Rules:\n{context_text}"
        }
    ]

    response = client_open_ai.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        temperature=temp
    )

    return response.choices[0].message.content


In [None]:
answer = ask_gpt4o_with_rag("How can I generate a net architecture?")
print(answer)

## WORLD GENERATION

In [None]:
import pprint
atlas = pymupdf.open("NightCityAtlas Copy.pdf")
world_state = {"locations": {}, "factions": {}}
current_location = None
# Basic regex patterns
location_header = re.compile(r"^(.+?)\s+\((\w)\)$")  # e.g., Little Europe (A)
sublocation_header = re.compile(r"^\((\w\d+)\)\s(.+?):?\s")
manager_pattern = re.compile(r"City Manager:\s(.+)")
security_pattern = re.compile(r"Security Provider:\s(.+)")
gangs_pattern = re.compile(r"Gangs Present:\s(.+)")

In [None]:
import re
import json
from pathlib import Path

INPUT_FILE = "NightCityAtlas.txt"
OUTPUT_FILE = "campaign/world_state.json"

def parse_world_text_upper(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        lines = [line.strip().upper() for line in f if line.strip()]

    world = {"locations": {}}
    current_region = None
    current_location = None
    i = 0

    while i < len(lines):
        line = lines[i]

        # Match global region: ##GLOBAL LOCATION##
        if re.match(r"^##.+##$", line):
            current_region = line.strip("#").strip()
            i += 1  # Skip region description or next line
            i += 1
            continue

        # Match location line like "LITTLE EUROPE (A)"
        location_match = re.match(r"^(.*?)\s+\([A-Z]\)$", line)
        if location_match:
            current_location = location_match.group(1).strip()
            location_data = {
                "description": "",
                "factions": [],
                "city_manager": None,
                "security_provider": None,
                "npcs_present": [],
                "events": [],
                "sub_locations": {},
                "region": current_region
            }

            # Read location description
            i += 1
            desc_lines = []
            while i < len(lines) and not lines[i].startswith("CITY MANAGER:"):
                desc_lines.append(lines[i])
                i += 1
            location_data["description"] = " ".join(desc_lines).strip()

            # Manager
            if i < len(lines) and lines[i].startswith("CITY MANAGER:"):
                location_data["city_manager"] = lines[i].split(":", 1)[1].strip()
                i += 1

            # Security
            if i < len(lines) and lines[i].startswith("SECURITY PROVIDER:"):
                location_data["security_provider"] = lines[i].split(":", 1)[1].strip()
                i += 1

            # Gangs
            if i < len(lines) and lines[i].startswith("GANGS PRESENT:"):
                gangs = lines[i].split(":", 1)[1]
                location_data["factions"] = [g.strip() for g in gangs.split(",")]
                i += 1

            # Sublocations
            if i < len(lines) and "LOCATIONS" in lines[i]:
                i += 1
                while i < len(lines) and re.match(r"^\([A-Z]\d+\)", lines[i]):
                    sub_match = re.match(r"^\(([A-Z]\d+)\)\s+(.+?):\s+(.+)", lines[i])
                    if sub_match:
                        _, title, desc = sub_match.groups()
                        location_data["sub_locations"][title.title()] = desc.strip().capitalize()
                    i += 1

            # Save location entry
            world["locations"][current_location] = location_data
        else:
            i += 1

    return world
def save_world_state(world, output_path):
    Path("campaign").mkdir(exist_ok=True)
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(world, f, indent=2, ensure_ascii=False)
    print(f"✅ World state saved to {output_path}")

In [None]:
world_data = parse_world_text_upper(INPUT_FILE)
save_world_state(world_data, OUTPUT_FILE)

## WORLD RAG

In [159]:
import uuid

def build_rag_chunks(world):
    chunks = []

    for loc_name, loc_data in world["locations"].items():
        # Main location entry
        chunks.append({
            "id": str(uuid.uuid4()),
            "text": f"{loc_name}: {loc_data['description']}",
            "metadata": {
                "region": loc_data.get("region"),
                "location": loc_name,
                "type": "location"
            }
        })

        # Sub-locations
        for sub_name, sub_desc in loc_data.get("sub_locations", {}).items():
            chunks.append({
                "id": str(uuid.uuid4()),
                "text": f"{sub_name}: {sub_desc}",
                "metadata": {
                    "region": loc_data.get("region"),
                    "location": loc_name,
                    "sublocation": sub_name,
                    "type": "sublocation"
                }
            })

    return chunks

In [None]:
from sentence_transformers import SentenceTransformer
import chromadb

model = SentenceTransformer("all-MiniLM-L6-v2")
client = chromadb.PersistentClient(path="./vectordb")
collection = client.get_or_create_collection("night_city_locations")

rag_chunks = build_rag_chunks(world_data)

for chunk in rag_chunks:
    embedding = model.encode(chunk["text"]).tolist()
    collection.add(
        ids=[chunk["id"]],
        embeddings=[embedding],
        documents=[chunk["text"]],
        metadatas=[chunk["metadata"]]
    )

print(f"✅ Indexed {len(rag_chunks)} chunks.")

In [160]:
client.get_or_create_collection("night_city_locations")

{'ids': ['2029eb31-cf12-471a-a9ef-cac86a682862',
  '071d2894-33e0-4cd0-a3df-7c41cb18d4fb',
  'c8e12a71-8a5c-4597-b612-90912ff4d35f',
  '9b76fdfc-6932-4158-bbe2-65963c50c277',
  'ce733234-c773-4ed8-83e7-77b5592de0ba',
  'f300598c-cfc6-4a6c-a20e-df0670fae643',
  '55a664dc-2f84-41f6-b199-9dcf13053183',
  '25d92ee0-39da-4fb1-8f4a-71bedabec354',
  '7808a837-b2f8-47c3-8a23-1fa4ac62a5af',
  'd6de38a3-68e8-40f0-ab77-86be1fa2cf59'],
 'embeddings': array([[ 0.08824068,  0.0775604 , -0.03163269, ...,  0.04255744,
         -0.05548633,  0.08193853],
        [ 0.01131884,  0.08527941,  0.02205821, ...,  0.0275717 ,
         -0.01869821,  0.04943081],
        [-0.09070063,  0.02243145, -0.04655228, ..., -0.01629366,
         -0.0125857 ,  0.0195929 ],
        ...,
        [ 0.0191662 ,  0.03951368, -0.01767524, ...,  0.00370416,
          0.05089001,  0.02286817],
        [-0.00567449, -0.04538913, -0.02788978, ...,  0.0116009 ,
         -0.09213474,  0.02040352],
        [ 0.05858824,  0.05028891, 

In [None]:
def route_query(query: str) -> list[str]:
    keywords = {
        "rules": ["dv", "roll", "damage", "cyberware", "initiative", "skill", "autofire", "check", "attack", "armor"],
        "world": ["location", "where", "district", "gang", "building", "club", "bar", "clinic", "neighborhood", "city"]
    }

    query_lc = query.lower()
    used_sources = []

    if any(kw in query_lc for kw in keywords["rules"]):
        used_sources.append("cpr_core_rules")
    if any(kw in query_lc for kw in keywords["world"]):
        used_sources.append("night_city_locations")

    # If nothing matched, include both
    if not used_sources:
        used_sources = ["cpr_core_rules", "night_city_locations"]

    return used_sources

In [None]:
def ask_gpt4o_with_context(user_query: str, context_text: str, system_prompt: str) -> str:
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"{user_query}\n\nContext:\n{context_text}"}
    ]

    response = client_open_ai.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        temperature=0.4
    )

    return response.choices[0].message.content

In [None]:
def query_with_context(query: str, k: int = 4):
    source_names = route_query(query)
    all_contexts = []

    for name in source_names:
        collection = client.get_collection(name)
        embedding = model.encode(query).tolist()
        results = collection.query(query_embeddings=[embedding], n_results=k)

        docs = results["documents"][0]
        metadatas = results["metadatas"][0]

        for doc, meta in zip(docs, metadatas):
            label = (
                meta.get("chapter") + f" – p.{meta.get('page')}"
                if "chapter" in meta and "page" in meta else
                meta.get("location", name).title()
            )
            all_contexts.append(f"[{label}]\n{doc}")

    context_text = "\n\n".join(all_contexts)

    # Pick system prompt dynamically
    if "cpr_core_rules" in source_names and len(source_names) == 1:
        system_prompt = (
            "You are a Cyberpunk RED game assistant. Answer the user's question "
            "using only the official rules. Include relevant citations (e.g., page number and chapter). "
            "If the rules are unclear, say so rather than guessing."
        )
    else:
        system_prompt = (
            "You are a Cyberpunk RED game assistant. Use the context below to answer the user's question truthfully. "
            "If the context is from the rules, cite the mechanic. If it's from the world, describe it accurately."
        )

    return ask_gpt4o_with_context(query, context_text, system_prompt)

In [None]:
query_with_context("Based on the rules, how far is it to get to Afterlife from The Glen?")

In [None]:
import xml.etree.ElementTree as ET
import json

def parse_weapon_table(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    rows = root.findall(".//row")
    weapons = []

    current_category = None

    for row in rows:
        cells = [cell.text.strip() if cell.text else "" for cell in row.findall("cell")]

        # Skip empty rows
        if not any(cells):
            continue

        # Category row example: "Medium Pistols"
        if len(cells) == 1 and cells[0].isupper() == False and len(cells[0].split()) <= 3:
            current_category = cells[0]
            print(current_category)
            continue

        # Weapon rows: expect at least name, cost, source
        non_empty = [c for c in cells if c]
        if len(non_empty) >= 3 and any("eb" in c.lower() for c in non_empty):
            weapon = {
                "name": non_empty[0],
                "cost": next((c for c in non_empty if "eb" in c.lower()), ""),
                "source": next((c for c in non_empty if "CP:" in c or "DL:" in c or "IR" in c or "MC" in c or "DGD" in c or "CEMK" in c or "BC" in c), ""),
                "category": current_category
            }
            weapons.append(weapon)

    return weapons

weapons = parse_weapon_table("item_index_1.xml")

with open("night_market.json", "w", encoding="utf-8") as f:
    json.dump(weapons, f, indent=2, ensure_ascii=False)

print(f"✅ Extracted {len(weapons)} weapons to night_market.json")

In [None]:
core_book = pymupdf.open("TalesOfRed.pdf")
chunks = []
toc = core_book.get_toc(simple=True)
page_map = {}
toc_index = 0
for page_num in range(len(core_book)):
        while toc_index + 1 < len(toc) and toc[toc_index + 1][2] <= page_num + 1:
            toc_index += 1
        page_map[page_num] = toc[toc_index][1] if toc else "Unknown"
page_map

In [None]:
stories = ["Night "]

In [None]:
toc