# Google Colab Link
[Google Colab](https://colab.research.google.com/drive/1XPnM_WpHPfSLTH4SVNDBP85QDlq_IbcH?usp=sharing)

# Clone Reference Repository

In [None]:
!git clone https://github.com/pixegami/rag-tutorial-v2
!git clone https://github.com/pixegami/langchain-rag-tutorial.git

fatal: destination path 'rag-tutorial-v2' already exists and is not an empty directory.
fatal: destination path 'langchain-rag-tutorial' already exists and is not an empty directory.


# Install Libraries

In [None]:
# Install necessary tools
!sudo apt-get install -y pciutils
# download ollama api
!curl -fsSL https://ollama.com/install.sh | sh
from IPython.display import clear_output

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
pciutils is already the newest version (1:3.7.0-6).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 CLI
############################################################################################# 100.0%
>>> Making ollama accessible in the PATH in /usr/local/bin
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> NVIDIA GPU installed.
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [None]:
!ollama pull mistral
clear_output()

In [None]:
!pip install ollama



In [None]:
!pip install marker-pdf



In [None]:
!pip install -r /content/langchain-rag-tutorial/requirements.txt
!pip install -r /content/rag-tutorial-v2/requirements.txt
!pip install "unstructured[md]"

Collecting langchain==0.2.2 (from -r /content/langchain-rag-tutorial/requirements.txt (line 2))
  Using cached langchain-0.2.2-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community==0.2.3 (from -r /content/langchain-rag-tutorial/requirements.txt (line 3))
  Using cached langchain_community-0.2.3-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-openai==0.1.8 (from -r /content/langchain-rag-tutorial/requirements.txt (line 4))
  Using cached langchain_openai-0.1.8-py3-none-any.whl.metadata (2.5 kB)
Collecting unstructured==0.14.4 (from -r /content/langchain-rag-tutorial/requirements.txt (line 5))
  Using cached unstructured-0.14.4-py3-none-any.whl.metadata (28 kB)
Collecting chromadb==0.5.0 (from -r /content/langchain-rag-tutorial/requirements.txt (line 9))
  Using cached chromadb-0.5.0-py3-none-any.whl.metadata (7.3 kB)
Collecting openai==1.31.1 (from -r /content/langchain-rag-tutorial/requirements.txt (line 10))
  Using cached openai-1.31.1-py3-none-any.whl.metadata (2

# Run Ollama Server

In [None]:
# Create a Python script to start the Ollama API server in a separate thread
import os
import threading
import subprocess
import requests
import json


def ollama():
    os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'
    os.environ['OLLAMA_ORIGINS'] = '*'
    subprocess.Popen(["ollama", "serve"])


ollama_thread = threading.Thread(target=ollama)
ollama_thread.start()

# Parse PDF as Markdowns using Marker

## Import Libraries

In [None]:
import requests
import os

## Define Paths

In [None]:
CHROMA_PATH = "chroma"
DATA_PATH = "/content/rag-tutorial-v2/data"

In [None]:
# Define the URL and the path
url = "https://czechgames.com/files/rules/codenames-rules-en.pdf"
save_path = DATA_PATH + "/codenames-rules-en.pdf"

# Ensure the directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Download the file
response = requests.get(url)

# Save the file
with open(save_path, 'wb') as file:
    file.write(response.content)

In [None]:
os.makedirs('/content/rag-tutorial-v2/data/md', exist_ok=True)

## Run Parsing

In [None]:
!marker /content/rag-tutorial-v2/data /content/rag-tutorial-v2/data/md --workers 4

2024-08-26 01:50:50.492411: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-26 01:50:50.510098: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-26 01:50:50.531555: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-26 01:50:50.538189: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-26 01:50:50.554049: I tensorflow/core/platform/cpu_feature_guar

In [None]:
import shutil

files_to_copy = [
    "/content/rag-tutorial-v2/data/md/codenames-rules-en/codenames-rules-en.md",
    "/content/rag-tutorial-v2/data/md/monopoly/monopoly.md",
    "/content/rag-tutorial-v2/data/md/ticket_to_ride/ticket_to_ride.md"
]

destination_folder = "/content/rag-tutorial-v2/data/md"

for file in files_to_copy:
    shutil.copy(file, destination_folder)

# Loading Markdown Documents
- https://python.langchain.com/v0.1/docs/modules/data_connection/document_loaders/

## Define Paths

In [None]:
import requests
import os

In [None]:
DATA_PATH = "/content/rag-tutorial-v2/data/md"

## Define and Call Function

In [None]:
from langchain.document_loaders import UnstructuredMarkdownLoader


def load_documents():
    """
    Purpose/Usage: Loads markdown documents from a specified directory.
    Inputs: None
    Outputs/Returns: A list of Document objects loaded from the specified directory.
    """
    documents = []
    for filename in os.listdir(DATA_PATH):
        if filename.endswith('.md'):
            file_path = os.path.join(DATA_PATH, filename)
            loader = UnstructuredMarkdownLoader(file_path)
            documents.extend(loader.load())
    return documents

Each document contains the text content of the PDF along with some metadata attached

In [None]:
# Print an example document
documents = load_documents()
print(documents[0])

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


page_content='MONOPOLY

Property Trading Game from Parker Brothers" 
AGES 8+

2 to 8 Players Contents: Gameboard, 3 dice, tokens, 32 houses, I2 hotels, Chance and Community Chest cards, Title Deed cards, play money and a Banker's tray.

Now there's a faster way to play MONOPOLY. Choose to play by the classic rules for buying, renting and selling properties or use the Speed Die to get into the action faster. If you've never played the classic MONOPOLY game, refer to the Classic Rules beginning on the next page.

If you already know how to play and want to use the Speed Die, just read the section below for the additional Speed Die rules.

Speed Die Rules

Learnins how to Play with the S~eed Die IS as

/ fast as playing with i't.

When starting the game, hand out an extra $1,000 to each player 
(two $5005 should work). The game moves fast and you'll need the extra cash to buy and build.

Do not use the Speed Die until you've landed on or passed over GO for the first time. Once you collect

# Split the Documents

## Define Function

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document


def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(documents)

In [None]:
documents = load_documents()
chunks = split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
print(chunks[0])

Split 3 documents into 74 chunks.
page_content='MONOPOLY

Property Trading Game from Parker Brothers" 
AGES 8+

2 to 8 Players Contents: Gameboard, 3 dice, tokens, 32 houses, I2 hotels, Chance and Community Chest cards, Title Deed cards, play money and a Banker's tray.

Now there's a faster way to play MONOPOLY. Choose to play by the classic rules for buying, renting and selling properties or use the Speed Die to get into the action faster. If you've never played the classic MONOPOLY game, refer to the Classic Rules beginning on the next page.

If you already know how to play and want to use the Speed Die, just read the section below for the additional Speed Die rules.

Speed Die Rules

Learnins how to Play with the S~eed Die IS as

/ fast as playing with i't.' metadata={'source': '/content/rag-tutorial-v2/data/md/monopoly.md'}


# Embedding Function
- https://python.langchain.com/v0.2/docs/integrations/text_embedding/


- Create embedding for each chunk
- Needed in 2 places
  1. When we create database
  2. When we query the database
- Need to use the same embedding function

## Import Libraries

In [None]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain.embeddings import OpenAIEmbeddings
import os

## Define Functions

In [None]:
def get_embedding_function():
    """
    Purpose/Usage:
    This function creates and returns an embedding function for processing text using a model.

    Inputs:
    - None

    Outputs/Returns:
    - An object that can be used to compute embeddings for text using the OpenAIEmbeddings class.
    """
    embeddings = OpenAIEmbeddings()
    return embeddings

# Populate Database

## Import Libraries

In [None]:
import shutil
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.vectorstores.chroma import Chroma

## Define Functions

In [None]:
def process_documents(reset=False):
    """
    Purpose/Usage:
    This function processes documents by optionally clearing the database, loading documents, splitting them into chunks, and adding them to a Chroma vector store.

    Inputs:
    - reset (bool): If True, the database will be cleared before processing documents.

    Outputs/Returns:
    - None
    """
    if reset:
        print("✨ Clearing Database")
        clear_database()

    documents = load_documents()
    chunks = split_documents(documents)
    add_to_chroma(chunks)


def split_documents(documents: list[Document]):
    """
    Purpose/Usage:
    Splits documents into smaller chunks for easier processing.

    Inputs:
    - documents (list[Document]): A list of Document objects to be split.

    Outputs/Returns:
    - A list of Document chunks.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(documents)


def add_to_chroma(chunks: list[Document]):
    """
    Purpose/Usage:
    Adds document chunks to a Chroma vector store, updating the store with new documents.

    Inputs:
    - chunks (list[Document]): A list of Document chunks to be added to the vector store.

    Outputs/Returns:
    - None
    """
    db = Chroma(
        persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()
    )

    chunks_with_ids = calculate_chunk_ids(chunks)

    # Go through all items in the database, and get their chunk id
    existing_items = db.get(include=[])  # IDs are always included by default
    existing_ids = set(existing_items["ids"])
    print(f"Number of existing documents in DB: {len(existing_ids)}")

    # Only add chunk with id that doesn't exist in the database
    new_chunks = []
    for chunk in chunks_with_ids:
        if chunk.metadata["id"] not in existing_ids:
            new_chunks.append(chunk)

    if len(new_chunks):
        print(f"👉 Adding new documents: {len(new_chunks)}")
        new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
        db.add_documents(new_chunks, ids=new_chunk_ids)
        db.persist()
    else:
        print("✅ No new documents to add")


def calculate_chunk_ids(chunks):
    """
    Purpose/Usage:
    Calculates unique IDs for each document chunk based on its source, page number, and chunk index.

    Inputs:
    - chunks (list[Document]): A list of Document chunks.

    Outputs/Returns:
    - A list of Document chunks with updated metadata containing unique IDs.
    """
    last_page_id = None
    current_chunk_index = 0

    # Loop through all the chunks
    for chunk in chunks:
        # Look into their metadata
        source = chunk.metadata.get("source")
        page = chunk.metadata.get("page")
        # Concatenate source and page to make chunk id
        current_page_id = f"{source}:{page}"

        if current_page_id == last_page_id:
            current_chunk_index += 1
        # For each new page reset the chunk index
        else:
            current_chunk_index = 0

        chunk_id = f"{current_page_id}:{current_chunk_index}"
        last_page_id = current_page_id

        # Add id to the chunk metadata
        chunk.metadata["id"] = chunk_id

    return chunks


def clear_database():
    """
    Purpose/Usage:
    Clears the Chroma vector store database by deleting the specified directory.

    Inputs:
    - None

    Outputs/Returns:
    - None
    """
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

In [None]:
process_documents(reset=True)

✨ Clearing Database


  warn_deprecated(


Number of existing documents in DB: 0
👉 Adding new documents: 74


  warn_deprecated(


In [None]:
!zip -r /content/chroma.zip {CHROMA_PATH} -x "*content*"

  adding: chroma/ (stored 0%)
  adding: chroma/chroma.sqlite3 (deflated 50%)
  adding: chroma/29f2785b-046b-4b17-8b0d-b13eeda72037/ (stored 0%)
  adding: chroma/29f2785b-046b-4b17-8b0d-b13eeda72037/data_level0.bin (deflated 100%)
  adding: chroma/29f2785b-046b-4b17-8b0d-b13eeda72037/link_lists.bin (stored 0%)
  adding: chroma/29f2785b-046b-4b17-8b0d-b13eeda72037/header.bin (deflated 61%)
  adding: chroma/29f2785b-046b-4b17-8b0d-b13eeda72037/length.bin (deflated 48%)


In [None]:
from google.colab import files
files.download('/content/chroma.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Query ChromaDB Vector Database

## Import Libraries

In [None]:
from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from IPython.display import display, Markdown

## Define Prompt Template

In [None]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""

In [None]:
DEFAULT_MODEL = 'gpt-3.5-turbo'


def process_query_gpt_base(query_text: str, _model_name=DEFAULT_MODEL):
    """
    Purpose/Usage:
    This function processes a query by searching a database for relevant context and generating a response using a language model.

    Inputs:
    - query_text (str): The text of the query to be processed.

    Outputs/Returns:
    - response_text (str): The response generated by the language model based on the query and context.
    """
    embedding_function = get_embedding_function()
    db = Chroma(persist_directory=CHROMA_PATH,
                embedding_function=embedding_function)

    results = db.similarity_search_with_score(query_text, k=5)

    context_text = "\n\n---\n\n".join(
        [doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)

    model = ChatOpenAI(model_name=_model_name)
    response_text = model.invoke(prompt)

    sources = [doc.metadata.get("id", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print(formatted_response)
    return response_text

In [None]:
def process_query_gpt_str(query_text: str, _model_name=DEFAULT_MODEL):
    """
    Purpose/Usage:
    This function processes a query by searching a database for relevant context and generating a response using a language model.

    Inputs:
    - query_text (str): The text of the query to be processed.

    Outputs/Returns:
    - response_text (str): The response generated by the language model based on the query and context.
    """
    embedding_function = get_embedding_function()
    db = Chroma(persist_directory=CHROMA_PATH,
                embedding_function=embedding_function)

    results = db.similarity_search_with_score(query_text, k=5)

    # Prepare the context text with ID and relevance score for each chunk
    context_text = ""
    sources = []
    for doc, score in results:
        chunk_id = doc.metadata.get("id", "Unknown ID")
        sources.append(chunk_id)
        context_text += f"Chunk ID: {chunk_id}\nRelevance Score: {score:.4f}\nContent:\n{doc.page_content}\n\n---\n\n"

    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)

    model = ChatOpenAI(model_name=_model_name)
    response_text = model.invoke(prompt)

    formatted_response = (
        f"Response: {response_text}\n"
        f"Sources: {sources}\n"
        f"Chunks used:\n{context_text}"
    )
    print(formatted_response)
    return formatted_response

In [None]:
def process_query_gpt_md(query_text: str, _model_name=DEFAULT_MODEL):
    """
    Purpose/Usage:
    This function processes a query by searching a database for relevant context and generating a response using a language model.

    Inputs:
    - query_text (str): The text of the query to be processed.

    Outputs/Returns:
    - None (displays the response as Markdown)
    """
    embedding_function = get_embedding_function()
    db = Chroma(persist_directory=CHROMA_PATH,
                embedding_function=embedding_function)

    results = db.similarity_search_with_score(query_text, k=5)

    # Prepare the context text with ID, relevance score, and content formatted in Markdown
    context_text = ""
    sources = []
    for i, (doc, score) in enumerate(results):
        chunk_id = doc.metadata.get("id", "Unknown ID")
        sources.append(chunk_id)
        context_text += f"### Chunk {i+1} - ID: {chunk_id}\n"
        context_text += f"**Relevance Score:** {score:.4f}\n\n"
        context_text += f"**Content:**\n```\n{doc.page_content}\n```\n\n---\n\n"

    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)

    model = ChatOpenAI(model_name=_model_name)
    response = model.invoke(prompt)

    # Extract response metadata
    response_metadata = response.response_metadata
    usage_metadata = response.usage_metadata

    # Format the response as Markdown
    markdown_response = (
        f"## Response\n"
        f"{response.content}\n\n"
        f"## Sources\n"
        + "\n".join(f"- {source}" for source in sources if source) + "\n\n"
        f"## Chunks Used\n\n"
        f"{context_text}"
        f"## Response Metadata\n"
        f"- **Token Usage:**\n"
        f"  - Completion Tokens: {response_metadata['token_usage']['completion_tokens']}\n"
        f"  - Prompt Tokens: {response_metadata['token_usage']['prompt_tokens']}\n"
        f"  - Total Tokens: {response_metadata['token_usage']['total_tokens']}\n"
        f"- **Model Name:** {response_metadata['model_name']}\n"
        f"- **System Fingerprint:** {response_metadata.get('system_fingerprint', 'None')}\n"
        f"- **Finish Reason:** {response_metadata['finish_reason']}\n"
        f"- **Logprobs:** {response_metadata.get('logprobs', 'None')}\n"
        f"## ID\n"
        f"- **Run ID:** {response.id}\n"
        f"## Usage Metadata\n"
        f"- **Input Tokens:** {usage_metadata['input_tokens']}\n"
        f"- **Output Tokens:** {usage_metadata['output_tokens']}\n"
        f"- **Total Tokens:** {usage_metadata['total_tokens']}\n"
    )

    # Display the response as Markdown
    display(Markdown(markdown_response))

## Example Usage

In [None]:
query_text = "How many clues can I give in Codenames?"
response = process_query_gpt_md(query_text, _model_name="gpt-4o-mini")

## Response
In Codenames, you can give only one clue at a time, but that clue can relate to multiple words. You are allowed to give a clue for only one word (e.g., "cashew: 1"), but you can also attempt to link it to two or more words (e.g., "tree: 2"). However, your clue must always be a single word.

## Sources
- /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:4
- /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:3
- /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:7
- /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:9
- /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:12

## Chunks Used

### Chunk 1 - ID: /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:4
**Relevance Score:** 0.2275

**Content:**
```
Giving A Clue

If you are the spymaster, you are trying to think of a one-word clue that relates to some of the words your team is trying to guess. When you think you have a good clue, you say it. You also say one number, which tells your teammates how many codenames are related to your clue. Example: Two of your words are NUT and BARK. Both of these grow on trees, so you say tree: 2.

You are allowed to give a clue for only one word (cashew: 1) but it's fun to try for two or more. Getting four words with one clue is a big accomplishment.

One Word

Your clue must be only one word. You are not allowed to give extra hints. For example, don't say, "This may be a bit of a stretch…" You are playing Codenames. It's always a bit of a stretch.
```

---

### Chunk 2 - ID: /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:3
**Relevance Score:** 0.2573

**Content:**
```
Spymasters take turns giving one-word clues. A clue may relate to multiple words on the table. The field operatives try to guess which words their spymaster meant. When a field operative touches a word, the spymaster reveals its secret identity. If the field operatives guess correctly, they may continue guessing, until they run out of ideas for the given clue or until they hit a wrong person. Then it is the other team's turn to give a clue and guess. The first team to contact all their agents wins the game.

GAME PLAY

4
Teams take turns. The starting team is indicated by the 4 lights on the edges of the key card.

Giving A Clue
```

---

### Chunk 3 - ID: /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:7
**Relevance Score:** 0.2806

**Content:**
```
You can stop guessing at any time, but usually you want to guess as many words as the spymaster said. Sometimes you might even want to guess one more:
Example: Red Team's first clue was tree: 2. The red operative wanted to guess ORANGE and NUT. She guessed ORANGE first. That was an innocent bystander, so she did not get a chance to guess NUT. Blue Team took a turn and correctly guessed two words. Now it is Red Team's turn again.

The red spymaster says river: 3. The red operative is pretty sure the AMAZON is a river, so she touches that card. The spymaster covers it with a red agent card, so she gets to go again. A river has a BED, so she touches that codename. It's also red, so she can go again.
```

---

### Chunk 4 - ID: /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:9
**Relevance Score:** 0.2863

**Content:**
```
Game Flow

Spymasters take turns giving clues. After a spymaster gives a clue, his or her team starts guessing. Their turn ends when they guess wrong, when they decide to stop, or when they have made the maximum number of guesses for that clue. Then it is the other team's turn.

Ending The Game

The game ends when one team has all their words covered. That team wins.

It is possible to win on the other team's turn if they guess your last word. The game can end early if a field operative makes contact with the assassin. That operative's team loses.

Setup For The Next Game

Do other people want a chance to be spymasters? Setup for the second game is easy. Remove the cards covering the codenames and put them back in their stacks. Now just flip over the 25 codenames, and you're ready to go!
```

---

### Chunk 5 - ID: /content/rag-tutorial-v2/data/md/codenames-rules-en.md:None:12
**Relevance Score:** 0.2924

**Content:**
```
If you prefer to play with strict time limits, you can download our timer app at codenamesgame.com.

Valid Clues

We playtested various rules. Some groups like the rules one way. Some like the rules another way. You should experiment to find out what your group likes.

Firm Rules

Some clues are invalid because they violate the spirit of the game.

Your clue must be about the meaning of the words. You can't use your clue to talk about the letters in a word or its position on the table.
```

---

## Response Metadata
- **Token Usage:**
  - Completion Tokens: 80
  - Prompt Tokens: 1079
  - Total Tokens: 1159
- **Model Name:** gpt-4o-mini
- **System Fingerprint:** fp_f3db212e1c
- **Finish Reason:** stop
- **Logprobs:** None
## ID
- **Run ID:** run-31d6b0b2-c576-442b-8373-ee65997b348b-0
## Usage Metadata
- **Input Tokens:** 1079
- **Output Tokens:** 80
- **Total Tokens:** 1159


In [None]:
query_text = "How do I get out of jail in Monopoly?"
response = process_query_gpt_md(query_text, _model_name="gpt-4o-mini")

## Response
You can get out of jail in Monopoly by:

1. Throwing doubles on any of your next three turns. If you succeed, you move forward the number of spaces shown by your doubles throw but do not take another turn.
2. Using a "Get Out of Jail Free" card if you have it.
3. Purchasing a "Get Out of Jail Free" card from another player and playing it.
4. Paying a fine of $50 before you roll the dice on either of your next two turns.

If you do not throw doubles by your third turn, you must pay the $50 fine to get out and then move forward the number of spaces shown by your throw.

## Sources
- /content/rag-tutorial-v2/data/md/monopoly.md:None:13
- /content/rag-tutorial-v2/data/md/monopoly.md:None:11
- /content/rag-tutorial-v2/data/md/monopoly.md:None:12
- /content/rag-tutorial-v2/data/md/monopoly.md:None:2
- /content/rag-tutorial-v2/data/md/monopoly.md:None:3

## Chunks Used

### Chunk 1 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:13
**Relevance Score:** 0.2375

**Content:**
```
You get out of Jail by.. .(I) throwing doubles on any of your next three turns; if you succeed in doing this you immediately move forward the number of spaces shown by your doubles throw; even though you had thrown doubles, you do not take another turn; (2) using the "Get Out of Jail Free" card if you have it; (3) purchasing the "Get Out of Jail Free" card from another player and playing it; (4) paying a fine of $50 before you roll the dice on either of your next two turns.

If you do not throw doubles by your third turn, you must pay the $50 fine. You then get out of Jail and immediately move forward the number of spaces shown by your throw.

Even though you are in Jail, you may buy and sell property, buy and sell houses and hotels and collect rents.
```

---

### Chunk 2 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:11
**Relevance Score:** 0.2950

**Content:**
```
"CHANCE" AND "COMMUNITY CHEST": When you land on either of these spaces, take the top card from the deck indicated, follow the instructions and return the card facedown to the bottom of the deck.

The "Get Out of Jail Free" card is held until used and then returned to the bottom of the deck. If the player who draws it does not wish to use it, helshe may sell it, at any time, to another player at a price agreeable to both.

"INCOME TAX": If you land here you have two options: You may estimate your tax at $900 and pay the Bank, or you may pay 10% of your total worth to the Bank. Your total worth is all your cash on hand, printed prices of mortgaged and unmortgaged properties and cost price of all buildings you own.
```

---

### Chunk 3 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:12
**Relevance Score:** 0.2965

**Content:**
```
You must decide which option you will take before you add up your total worth.

"JAIL": You land in Jail when. ..(I) your token lands on the space marked "Go to Jail"; (2) you draw a card marked "Go to JailN; or 
(3) you throw doubles three times in succession.

When you are sent to Jail you cannot

collect your $200 salary in that move since, regardless of where your token is on the board, you must move it directly into Jail. Your turn ends when you are sent to Jail.

If you are not "sent" to Jail but in the ordinary course of play land on that space, you are "Just Visiting," you incur no penalty, and you move ahead in the usual manner on your next turn.
```

---

### Chunk 4 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:2
**Relevance Score:** 0.3061

**Content:**
```
Bus: This lets you "get off the bus early." Look at the two white

dice. You can move the value of one die, the other die, or the sum of both dice. So if you rolled a 1 and a 5, you can move 1 space, 5 spaces, or 6 spaces: \t's your choice.

Mr. Monopoly: First, move the sum of the two white dice

and resolve the space you land on (such as drawing a card, buying the property, paying rent, etc.). Then, one of two things will happen depending on whether or not there is still property in the bank.

YES, there is property in the bank -Advance to the NEXT 
property that the bank still holds and buy it if you wish. If you don't want to buy this property, move to the space anyway and put the property up for auction.
```

---

### Chunk 5 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:3
**Relevance Score:** 0.3241

**Content:**
```
NO, there are no more properties in the bank - Advance to the NOCT property on which you will owe another player money.

A few minor details: 
Only the white dice are used when determining if you rolled doubles.

Do not look at the Speed Die.

If you roll a three-of-a-kind (all of the dice show the same number), 
you can move anywhere you want on the board!

If you get sent to jail during your move (either by landing on the "Go to Jail" space or by rolling doubles three times in a row) then your turn is over and you do not get to use the Speed Die for that turn.

Use the white dice ONLY when rolling to get out of jail. L 
Use the sum of all three dice when determining how much to pay on a utility. Note: The Bus and Mr. Monopoly are valued at 0.

Classic Monopow Rules
```

---

## Response Metadata
- **Token Usage:**
  - Completion Tokens: 137
  - Prompt Tokens: 1149
  - Total Tokens: 1286
- **Model Name:** gpt-4o-mini
- **System Fingerprint:** fp_507c9469a1
- **Finish Reason:** stop
- **Logprobs:** None
## ID
- **Run ID:** run-9eb2280e-ed62-409c-b654-9f6876159f13-0
## Usage Metadata
- **Input Tokens:** 1149
- **Output Tokens:** 137
- **Total Tokens:** 1286


In [None]:
query_text = "How many points does the longest continuous train get in Ticket to Ride?"
response = process_query_gpt_md(query_text, _model_name="gpt-4o-mini")

## Response
The player with the Longest Continuous Path of routes receives a special bonus card and adds 10 points to their score.

## Sources
- /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:18
- /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:17
- /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:2
- /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:6
- /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:8

## Chunks Used

### Chunk 1 - ID: /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:18
**Relevance Score:** 0.2272

**Content:**
```
the same city several times, but a given plastic train may never be used twice in the same continuous path. In the case of a tie for the longest path, all tied players score the 10 point bonus. The player with the most points wins the game. If two or more players are tied for the most points, the player who has completed the most Destination Tickets wins. In the unlikely event that they are still tied, the player with the Longest Continuous Path card wins.
```

---

### Chunk 2 - ID: /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:17
**Relevance Score:** 0.2551

**Content:**
```
Calculating Scores Players should have already accounted for the points earned as they completed different routes. To make sure no mistakes were made, you may want to re-count the points for each player's routes. Players should then reveal all their Destination Tickets and add (or subtract) the value of their Destination Tickets still in hand, based on whether they successfully (or not) connected those cities together. The player who has the Longest Continuous Path of routes receives this special bonus card and adds 10 points to his score. When evaluating and comparing path lengths, only take into account continuous lines of plastic trains of the same color. A continuous path may include loops, and pass through the same city several times, but a given plastic train may never be used twice
```

---

### Chunk 3 - ID: /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:2
**Relevance Score:** 0.3140

**Content:**
```
Each succeeding year, they met to celebrate the anniversary and pay tribute to Fogg. And each year a new expedition (always more difficult) with a new wager (always more expensive) was proposed. Now at the dawn of the century it was time for a new impossible journey. The stakes: $1 Million in a winner-takes-all competition. The objective: to see which of them could travel by rail to the most cities in North America - in just 7 days. The journey would begin immediately…
Ticket to Ride is a cross-country train adventure. Players compete to connect different cities by laying claim to railway routes on a
map of North America.

Components
```

---

### Chunk 4 - ID: /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:6
**Relevance Score:** 0.3341

**Content:**
```
Object Of The Came

The object of the game is to score the highest number of total points. Points can be scored by:
· Claiming a Route between two adjacent cities on the map;
· Successfully completing a Continuous Path of routes between two cities listed on your Destination Ticket(s);
· Completing the Longest Continuous Path of routes.

Points are lost if you do not successfully complete the route given on the Destination Ticket(s) you kept.

The Game Turn
```

---

### Chunk 5 - ID: /content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:8
**Relevance Score:** 0.3475

**Content:**
```
Claim a Route - The player may claim a route on the board by playing a set of Train Car cards that match the color and length of the route and then placing one of his colored trains on each space of this route. He then records his score by moving his Scoring Marker the appropriate number of spaces (see Route Scoring Table) along the Scoring Track on the board.

Draw Destination Tickets - The player draws 3 Destination Tickets from the top of the deck. He must keep at least one of them, but he may keep two or all three if he chooses. Any returned cards are placed on the bottom of the deck.
```

---

## Response Metadata
- **Token Usage:**
  - Completion Tokens: 24
  - Prompt Tokens: 886
  - Total Tokens: 910
- **Model Name:** gpt-4o-mini
- **System Fingerprint:** fp_f3db212e1c
- **Finish Reason:** stop
- **Logprobs:** None
## ID
- **Run ID:** run-140a1317-98e7-49ea-a0ef-dfa42c9ccb92-0
## Usage Metadata
- **Input Tokens:** 886
- **Output Tokens:** 24
- **Total Tokens:** 910


In [None]:
query_text = "How much total money does a player start with in original Monopoly? Keep it short"
response = process_query_gpt_md(query_text, _model_name="gpt-4o-mini")

## Response
A player starts with $1,500 in original Monopoly.

## Sources
- /content/rag-tutorial-v2/data/md/monopoly.md:None:4
- /content/rag-tutorial-v2/data/md/monopoly.md:None:0
- /content/rag-tutorial-v2/data/md/monopoly.md:None:2
- /content/rag-tutorial-v2/data/md/monopoly.md:None:11
- /content/rag-tutorial-v2/data/md/monopoly.md:None:6

## Chunks Used

### Chunk 1 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:4
**Relevance Score:** 0.3044

**Content:**
```
Classic Monopow Rules

OBJECT: The object of the game IS to become the

wealthiest player through buying, renting and selling property. PREPARATION: Place the board on a table and put the Chance and Community Chest cards facedown on their allotted spaces on the board. Each player chooses one token to represent himther while traveling around the board.

Each player is given $1,500 divided as follows: P each of $500s, 
$100~ and $50~; 6 $40~; 5 each of $105, $5~ and $Is.

All remaining money and other equipment go to the Bank. Stack the .. Bank's money on edge in the compartments in the plastic Banker's tray.

BANKER. Select as Banker a player who will also
```

---

### Chunk 2 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:0
**Relevance Score:** 0.3662

**Content:**
```
MONOPOLY

Property Trading Game from Parker Brothers" 
AGES 8+

2 to 8 Players Contents: Gameboard, 3 dice, tokens, 32 houses, I2 hotels, Chance and Community Chest cards, Title Deed cards, play money and a Banker's tray.

Now there's a faster way to play MONOPOLY. Choose to play by the classic rules for buying, renting and selling properties or use the Speed Die to get into the action faster. If you've never played the classic MONOPOLY game, refer to the Classic Rules beginning on the next page.

If you already know how to play and want to use the Speed Die, just read the section below for the additional Speed Die rules.

Speed Die Rules

Learnins how to Play with the S~eed Die IS as

/ fast as playing with i't.
```

---

### Chunk 3 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:2
**Relevance Score:** 0.3675

**Content:**
```
Bus: This lets you "get off the bus early." Look at the two white

dice. You can move the value of one die, the other die, or the sum of both dice. So if you rolled a 1 and a 5, you can move 1 space, 5 spaces, or 6 spaces: \t's your choice.

Mr. Monopoly: First, move the sum of the two white dice

and resolve the space you land on (such as drawing a card, buying the property, paying rent, etc.). Then, one of two things will happen depending on whether or not there is still property in the bank.

YES, there is property in the bank -Advance to the NEXT 
property that the bank still holds and buy it if you wish. If you don't want to buy this property, move to the space anyway and put the property up for auction.
```

---

### Chunk 4 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:11
**Relevance Score:** 0.3771

**Content:**
```
"CHANCE" AND "COMMUNITY CHEST": When you land on either of these spaces, take the top card from the deck indicated, follow the instructions and return the card facedown to the bottom of the deck.

The "Get Out of Jail Free" card is held until used and then returned to the bottom of the deck. If the player who draws it does not wish to use it, helshe may sell it, at any time, to another player at a price agreeable to both.

"INCOME TAX": If you land here you have two options: You may estimate your tax at $900 and pay the Bank, or you may pay 10% of your total worth to the Bank. Your total worth is all your cash on hand, printed prices of mortgaged and unmortgaged properties and cost price of all buildings you own.
```

---

### Chunk 5 - ID: /content/rag-tutorial-v2/data/md/monopoly.md:None:6
**Relevance Score:** 0.3840

**Content:**
```
The Bank nwer "goes broke." If the Bank runs out of money, the Banker may issue as much more as needed by writing on any ordinary paper.

THE PLAY: Starting with the Banker, each player in turn throws the dice.

The player with the highest total starts the play: Place your token on the corner marked "GO," throw the dice and move your token in the direction of the arrow the number of spaces indicated by the dice. After you have completed your play, the turn passes to the left. The tokens remain on the spaces qccupied and proceed from that point on the player's next turn. Two or more tokens may rest on the same space at the same time.
```

---

## Response Metadata
- **Token Usage:**
  - Completion Tokens: 12
  - Prompt Tokens: 1110
  - Total Tokens: 1122
- **Model Name:** gpt-4o-mini
- **System Fingerprint:** fp_48196bc67a
- **Finish Reason:** stop
- **Logprobs:** None
## ID
- **Run ID:** run-7d396953-5255-4719-a6a2-5f68579d82fb-0
## Usage Metadata
- **Input Tokens:** 1110
- **Output Tokens:** 12
- **Total Tokens:** 1122


# Unit Testing

In [None]:
DEFAULT_OLLAMA = "mistral"

## Define Prompt Template

In [None]:
EVAL_PROMPT = """
Expected Response: {expected_response}
Actual Response: {actual_response}
---
(Answer with 'true' or 'false') Does the actual response match the expected response?
"""

## Define Functions

In [None]:
def evaluate_monopoly_rules():
    """
    Purpose/Usage:
    This function tests whether the response to a Monopoly rules question matches the expected answer.

    Inputs:
    - None

    Outputs/Returns:
    - None
    """
    assert query_and_validate(
        question="How much total money does a player start with in original Monopoly? (Answer with the number only without $)",
        expected_response="1500",
    )


def evaluate_ticket_to_ride_rules():
    """
    Purpose/Usage:
    This function tests whether the response to a Ticket to Ride rules question matches the expected answer.

    Inputs:
    - None

    Outputs/Returns:
    - None
    """
    assert query_and_validate(
        question="How many points does the longest continuous train get in Ticket to Ride? (Answer with the number only)",
        expected_response="10 points",
    )


def query_and_validate(question: str, expected_response: str):
    """
    Purpose/Usage:
    This function queries a language model with a question and validates the response against an expected answer.

    Inputs:
    - question (str): The question to be asked.
    - expected_response (str): The expected answer to the question.

    Outputs/Returns:
    - bool: True if the actual response matches the expected response; False otherwise.
    """
    response_text = process_query_gpt_base(question, _model_name="gpt-4o-mini")
    prompt = EVAL_PROMPT.format(
        expected_response=expected_response, actual_response=response_text
    )

    model = Ollama(model=DEFAULT_OLLAMA)
    evaluation_results_str = model.invoke(prompt)
    evaluation_results_str_cleaned = evaluation_results_str.strip().lower()

    print(prompt)

    if "true" in evaluation_results_str_cleaned:
        print("\033[92m" +
              f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
        return True
    elif "false" in evaluation_results_str_cleaned:
        print("\033[91m" +
              f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
        return False
    else:
        raise ValueError(
            f"Invalid evaluation result. Cannot determine if 'true' or 'false'."
        )

In [None]:
evaluate_monopoly_rules()

Response: content='1500' response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 875, 'total_tokens': 877}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_507c9469a1', 'finish_reason': 'stop', 'logprobs': None} id='run-26ce9a43-68d1-4d97-bd3f-9b3d10704337-0' usage_metadata={'input_tokens': 875, 'output_tokens': 2, 'total_tokens': 877}
Sources: ['/content/rag-tutorial-v2/data/md/monopoly.md:None:4', '/content/rag-tutorial-v2/data/md/monopoly.md:None:2', '/content/rag-tutorial-v2/data/md/monopoly.md:None:8', '/content/rag-tutorial-v2/data/md/monopoly.md:None:0', '/content/rag-tutorial-v2/data/md/monopoly.md:None:11']

Expected Response: 1500
Actual Response: content='1500' response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 875, 'total_tokens': 877}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_507c9469a1', 'finish_reason': 'stop', 'logprobs': None} id='run-26ce9a43-68d1-4d97-bd3f-9b3d10704337-0' usage_metadata={'input_tokens'

In [None]:
evaluate_ticket_to_ride_rules()

Response: content='10' response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 656, 'total_tokens': 657}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_507c9469a1', 'finish_reason': 'stop', 'logprobs': None} id='run-a57f5a72-dbdb-45ee-a77e-e297e3961980-0' usage_metadata={'input_tokens': 656, 'output_tokens': 1, 'total_tokens': 657}
Sources: ['/content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:18', '/content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:17', '/content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:2', '/content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:6', '/content/rag-tutorial-v2/data/md/ticket_to_ride.md:None:8']

Expected Response: 10 points
Actual Response: content='10' response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 656, 'total_tokens': 657}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_507c9469a1', 'finish_reason': 'stop', 'logprobs': None} id='run-a57f5a72-dbdb-45ee-a77e-e297e3961980-0