In [None]:
import json
import os
import re
from openai import OpenAI
import numpy as np
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file

In [None]:
output_folder = "./output_chapter/"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    
chapter_file_name = "chapter_chunks_with_info.json"
chapter_file_path = f"./output_chapters/{chapter_file_name}"

if not os.path.exists(chapter_file_path):
    print(f"File {chapter_file_path} does not exist.")
    raise SystemExit(1)

with open(chapter_file_path, 'r') as file:
    data = json.load(file)


In [None]:
def additional_chunk_info(json_path):
    """
    Reads the JSON file and calculates additional statistics about the chunks.
    Prints the min, max, and average number of characters per chunk.
    Also prints which chunk number has the min and max.
    """
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    content = data.get("content", {})
    num_characters = [chunk["num_characters"] for chunk in content.values()]
    chunk_keys = list(content.keys())

    if not num_characters:
        print("No chunks found.")
        return

    min_chars = min(num_characters)
    max_chars = max(num_characters)
    avg_chars = sum(num_characters) / len(num_characters)

    min_idx = num_characters.index(min_chars)
    max_idx = num_characters.index(max_chars)

    print(f"Minimum characters in a chunk: {min_chars} (chunk: {chunk_keys[min_idx]})")
    print(f"Maximum characters in a chunk: {max_chars} (chunk: {chunk_keys[max_idx]})")
    print(f"Average characters per chunk: {avg_chars:.2f}")

additional_chunk_info(json_path=chapter_file_path)
# This is fine cause 1000 characters are around 250 tokens, which is a good size for LLMs even if we send multiple chunks

In [None]:
# Set your OpenAI API key
client = OpenAI()
client.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
# This is using openai but you could use any other library
def get_embedding(text, model="text-embedding-3-small"):
    # Returns the embedding vector for the given text
    response = client.embeddings.create(input=[text], model=model)
    return response.data[0].embedding

In [None]:
embedded = get_embedding("These are words")
print(f"Embedding {embedded}")

In [None]:
# 1. Embed the player question and save it
player_question = "What are my best traits as an elf?"
player_question_embedding = get_embedding(player_question)
player_question_embedding_file_path = f"{output_folder}player_question_embedding.json"

with open(f"{player_question_embedding_file_path}", "w") as f:
    json.dump({
        "question": player_question,
        "embedding": player_question_embedding
    }, f)

print("Player question embedding saved to player_question_embedding.json")

In [None]:
# 2. Embed each chunk and save to a new JSON file
embeded_chunks_file_path = f"{output_folder}embedded_chunks.json"

embedded_chunks = {}
for key, chunk in data["content"].items():
    chunk_text = chunk["text"]
    embedding = get_embedding(chunk_text)
    chunk_with_embedding = chunk.copy()
    chunk_with_embedding["embedding"] = embedding
    embedded_chunks[key] = chunk_with_embedding

with open(f"{embeded_chunks_file_path}", "w") as f:
    json.dump(embedded_chunks, f)


In [None]:

# 3. Read the new JSON file and compare to find the 3 most similar chunks
def cosine_similarity(a, b):
    a = np.array(a)
    b = np.array(b)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

with open(f"{embeded_chunks_file_path}", "r") as f:
    loaded_chunks = json.load(f)

with open(player_question_embedding_file_path, "r") as f:
    player_q = json.load(f)
    player_embedding = player_q["embedding"]

# Calculate similarities
similarities = []
for key, chunk in loaded_chunks.items():
    sim = cosine_similarity(player_embedding, chunk["embedding"])
    similarities.append((key, sim))

In [None]:
# Sort and get top 3
top_3 = sorted(similarities, key=lambda x: x[1], reverse=True)[:3]

print("Top 3 most similar chunks:")
for key, sim in top_3:
    print(f"Chunk {key} (similarity: {sim:.4f}):")
    print(loaded_chunks[key]["text"][:300])
    print("---")

In [None]:
# make those top 3 a single string
top_3_chunks = "\n\n".join(
    [loaded_chunks[key]["text"] for key, _ in top_3]
)

# 4. Save the top 3 chunks to a new txt file
top_3_chunks_file_path = f"{output_folder}top_3_chunks.txt"
with open(top_3_chunks_file_path, "w") as f:
    f.write(top_3_chunks)
print(f"Top 3 chunks saved to {top_3_chunks_file_path}")

In [None]:
# get top_3_chunks text from txt
with open(top_3_chunks_file_path, "r") as f:
    top_3_chunks_text = f.read()

print("Top 3 chunks text:")
print(top_3_chunks_text[:300])  # Print first 300 characters for brevity

In [None]:
player_question_with_context = f'''
You are a helpful assistant that provides answers based on the context of a fantasy world. You must use the provided context to answer the player's question. You MUST provide snippets from the context to support your answer. If you don't know the answer, say "I don't know" and do not make up information.
Player Question: {player_question}
Context: {top_3_chunks_text}
'''

client = OpenAI()

response = client.responses.create(
    model="o4-mini-2025-04-16",
    input=f"{player_question_with_context}"
)

print(response.output_text)

In [None]:
def answer_player_question(question, client, data, output_folder, embedded_chunks_file_path):
    """
    Embeds the player question, finds the top 3 most similar chunks using cosine similarity,
    and generates an answer using the context of those chunks. Each question/answer session
    is saved in its own folder for easy review.
    """
    import uuid
    import datetime

    # Create a unique session folder for this question
    session_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + "_" + str(uuid.uuid4())[:8]
    session_folder = os.path.join(output_folder, f"session_{session_id}")
    os.makedirs(session_folder, exist_ok=True)

    # 1. Embed the player question and save it
    embedding = get_embedding(question)
    question_embedding_file_path = os.path.join(session_folder, "player_question_embedding.json")
    with open(question_embedding_file_path, "w") as f:
        json.dump({
            "question": question,
            "embedding": embedding
        }, f)

    # 2. Load embedded chunks
    with open(embedded_chunks_file_path, "r") as f:
        loaded_chunks = json.load(f)

    # 3. Compute similarities
    similarities = []
    for key, chunk in loaded_chunks.items():
        sim = cosine_similarity(embedding, chunk["embedding"])
        similarities.append((key, sim))
    top_3 = sorted(similarities, key=lambda x: x[1], reverse=True)[:3]

    # 4. Get top 3 chunks as context and save them
    top_3_chunks = "\n\n".join(
        [loaded_chunks[key]["text"] for key, _ in top_3]
    )
    top_3_chunks_file_path = os.path.join(session_folder, "top_3_chunks.txt")
    with open(top_3_chunks_file_path, "w") as f:
        f.write(top_3_chunks)

    # 5. Prepare prompt and get answer
    player_question_with_context = f'''
You are a helpful assistant that provides answers based on the context of a fantasy world. You must use the provided context to answer the player's question. You MUST provide snippets from the context to support your answer. If you don't know the answer, say "I don't know" and do not make up information.
Player Question: {question}
Context: {top_3_chunks}
'''
    response = client.responses.create(
        model="o4-mini-2025-04-16",
        input=player_question_with_context
    )

    # Save the answer
    answer_file_path = os.path.join(session_folder, "answer.txt")
    with open(answer_file_path, "w") as f:
        f.write(response.output_text)

    print(f"Session saved in: {session_folder}")
    print(response.output_text)
    return response.output_text

In [None]:
player_question = "How far do I see as a dwarf?"
with open(embeded_chunks_file_path, "r") as f:
    loaded_chunks = json.load(f)

answer_player_question(
    question=player_question,
    client=client,
    data=loaded_chunks,
    output_folder=output_folder,
    embedded_chunks_file_path=embeded_chunks_file_path
)

In [None]:
player_question = "How far do I see as a spider?"
with open(embeded_chunks_file_path, "r") as f:
    loaded_chunks = json.load(f)

answer_player_question(
    question=player_question,
    client=client,
    data=loaded_chunks,
    output_folder=output_folder,
    embedded_chunks_file_path=embeded_chunks_file_path
)

In [None]:
player_question = "If I want to be quicker, which race should I be?"
with open(embeded_chunks_file_path, "r") as f:
    loaded_chunks = json.load(f)

answer_player_question(
    question=player_question,
    client=client,
    data=loaded_chunks,
    output_folder=output_folder,
    embedded_chunks_file_path=embeded_chunks_file_path
)

In [None]:
player_question = "If I want to be quicker, should I be a dwarf or an elf?"
with open(embeded_chunks_file_path, "r") as f:
    loaded_chunks = json.load(f)

answer_player_question(
    question=player_question,
    client=client,
    data=loaded_chunks,
    output_folder=output_folder,
    embedded_chunks_file_path=embeded_chunks_file_path
)

### Now this is the fun part
RAG is super powerful because it is super simple: get the context you need...but how do we know where it is what we need? Or even worse! How do we know what we need?

#### Ideas
We could create an agent that divides the question into questions that we should get context for each with RAG. Then we get context using each question and we have context for the whole answer (lets see how GPT does this)

In [None]:
import ast

def agent_decompose_and_rag(question, client, data, output_folder, embedded_chunks_file_path):
    """
    1. Uses the LLM to decompose a complex question into sub-questions.
    2. For each sub-question, retrieves top-3 relevant chunks using RAG.
    3. Aggregates all retrieved context and asks the LLM to answer the original question, citing context.
    """
    # Step 1: Decompose the question
    decompose_prompt = f"""
You are an expert assistant. Given the following player question, break it down into the minimal set of sub-questions needed to fully answer it. Return the sub-questions as a Python list of strings.

Player Question: {question}
"""
    decompose_response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": decompose_prompt}]
    )
    # Try to extract the list of sub-questions
    try:
        sub_questions = ast.literal_eval(decompose_response.choices[0].message.content.strip())
    except Exception:
        # fallback: treat as single question
        sub_questions = [question]

    # Step 2: For each sub-question, get top-3 context chunks
    with open(embedded_chunks_file_path, "r") as f:
        loaded_chunks = json.load(f)

    all_contexts = []
    for sub_q in sub_questions:
        embedding = get_embedding(sub_q)
        similarities = []
        for key, chunk in loaded_chunks.items():
            sim = cosine_similarity(embedding, chunk["embedding"])
            similarities.append((key, sim))
        top_3 = sorted(similarities, key=lambda x: x[1], reverse=True)[:3]
        context = "\n\n".join([loaded_chunks[key]["text"] for key, _ in top_3])
        all_contexts.append(f"Sub-question: {sub_q}\nContext:\n{context}")

    # Step 3: Aggregate all context and ask for a final answer
    aggregated_context = "\n\n".join(all_contexts)
    final_prompt = f"""
You are a helpful assistant that answers player questions about a fantasy world. Use the provided context for each sub-question to answer the original question. Cite snippets from the context to support your answer. If you don't know, say "I don't know" and do not make up information.

Original Question: {question}

Context for sub-questions:
{aggregated_context}
"""
    final_response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": final_prompt}]
    )
    print(final_response.choices[0].message.content)
    return final_response.choices[0].message.content

In [None]:
with open(embeded_chunks_file_path, "r") as f:
    loaded_chunks = json.load(f)

agent_decompose_and_rag(
    question="If I want to be quicker, should I be a dwarf or an elf?",
    client=client,
    data=loaded_chunks,
    output_folder=output_folder,
    embedded_chunks_file_path=embeded_chunks_file_path
)