## Ingestion

In [1]:
import chromadb
chroma_client = chromadb.Client()

In [2]:
tourism_collection = chroma_client.create_collection(
    name="tourism_collection")

In [3]:
tourism_collection.add(
    documents=[
        "Paestum, Greek Poseidonia, ancient city in southern Italy near the west coast, 22 miles (35 km) southeast of modern Salerno and 5 miles (8 km) south of the Sele (ancient Silarus) River. Paestum is noted for its splendidly preserved Greek temples.",
        "Poseidonia was probably founded about 600 BC by Greek colonists from Sybaris, along the Gulf of Taranto, and it had become a flourishing town by 540, judging from its temples. After many years’ resistance the city came under the domination of the Lucanians (an indigenous Italic people) sometime before 400 BC, after which its name was changed to Paestum. Alexander, the king of Epirus, defeated the Lucanians at Paestum about 332 BC, but the city remained Lucanian until 273, when it came under Roman rule and a Latin colony was founded there. The city supported Rome during the Second Punic War. The locality was still prosperous during the early years of the Roman Empire, but the gradual silting up of the mouth of the Silarus River eventually created a malarial swamp, and Paestum was finally deserted after being sacked by Muslim raiders in AD 871. The abandoned site’s remains were rediscovered in the 18th century.",
        "The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Traces of a Roman amphitheatre and other buildings, as well as intersecting main streets, have also been found. The circuit of the town walls, which are built of travertine blocks and are 15–20 feet (5–6 m) thick, is about 3 miles (5 km) in circumference. In July 1969 a farmer uncovered an ancient Lucanian tomb that contained Greek frescoes painted in the early classical style. Paestum’s archaeological museum contains these and other treasures from the site."
    ],
    metadatas=[
        {"source": "https://www.britannica.com/place/Paestum"},
        {"source": "https://www.britannica.com/place/Paestum"},
        {"source": "https://www.britannica.com/place/Paestum"}
    ],
    ids=["paestum-br-01", "paestum-br-02", "paestum-br-03"]
)

## Q&A

In [4]:
results = tourism_collection.query(
    query_texts=["How many Doric temples are in Paestum"],
    n_results=1
)
print(results)

{'ids': [['paestum-br-03']], 'embeddings': None, 'documents': [['The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Traces of a Roman amphitheatre and other buildings, as well as intersecting main streets, have also been found. The circuit of the town walls, which are built of travertine blocks and are 15–20 feet (5–6 m) thick, is about 3 miles (5 km) in circumference. In July 1969 a farmer uncovered an ancien

In [5]:
results = tourism_collection.query(
    query_texts=["How many Doric temples are in Paestum"],
    n_results=3
)
print(results)

{'ids': [['paestum-br-03', 'paestum-br-01', 'paestum-br-02']], 'embeddings': None, 'documents': [['The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Traces of a Roman amphitheatre and other buildings, as well as intersecting main streets, have also been found. The circuit of the town walls, which are built of travertine blocks and are 15–20 feet (5–6 m) thick, is about 3 miles (5 km) in circumference. In July

## RAG from scratch

In [7]:
# ============================================================================
# IMPORTS
# ============================================================================
import os
from openai import OpenAI
from dotenv import load_dotenv

# ============================================================================
# ENV SETUP (โหลดและเช็กค่าจาก .env)
# ============================================================================
load_dotenv()

OPENROUTER_API_KEY  = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL")  # เช่น https://openrouter.ai/api/v1

if not OPENROUTER_API_KEY:
    raise RuntimeError("Missing OPENROUTER_API_KEY in .env")

if not OPENROUTER_BASE_URL:
    raise RuntimeError("Missing OPENROUTER_BASE_URL in .env")

# ============================================================================
# สร้าง Client ชี้ไปที่ OpenRouter
# ============================================================================
openai_client = OpenAI(
    api_key=OPENROUTER_API_KEY,
    base_url=OPENROUTER_BASE_URL,
)

In [8]:
def query_vector_database(question):
    results = tourism_collection.query(
    query_texts=[question],
    n_results=1)

    results_text = results['documents'][0][0]

    return results_text

In [9]:
results_text = query_vector_database("How many Doric temples are in Paestum")
print(results_text)

The ancient Greek part of Paestum consists of two sacred areas containing three Doric temples in a remarkable state of preservation. During the ensuing Roman period a typical forum and town layout grew up between the two ancient Greek sanctuaries. Of the three temples, the Temple of Athena (the so-called Temple of Ceres) and the Temple of Hera I (the so-called Basilica) date from the 6th century BC, while the Temple of Hera II (the so-called Temple of Neptune) was probably built about 460 BC and is the best preserved of the three. The Temple of Peace in the forum is a Corinthian-Doric building begun perhaps in the 2nd century BC. Traces of a Roman amphitheatre and other buildings, as well as intersecting main streets, have also been found. The circuit of the town walls, which are built of travertine blocks and are 15–20 feet (5–6 m) thick, is about 3 miles (5 km) in circumference. In July 1969 a farmer uncovered an ancient Lucanian tomb that contained Greek frescoes painted in the earl

## Naive prompt implementation

In [10]:
def prompt_template(question, text):
    return f'Read the following text and answer this question: {question}. \nText: {text}'

In [11]:
def execute_llm_prompt(prompt_input):
    prompt_response = openai_client.chat.completions.create(
        model='gpt-5-nano',
        messages=[
         {"role": "system", "content": "You are an assistant for question-answering tasks."},
         {"role": "user", "content": prompt_input}
        ])
    return prompt_response

## Trick question

In [12]:
trick_question = "How many columns have the three temples got in total?"
tq_result_text = query_vector_database(trick_question)
tq_prompt = prompt_template(trick_question , tq_result_text)
tq_prompt_response = execute_llm_prompt(tq_prompt)

In [13]:
print(tq_prompt_response)

ChatCompletion(id='gen-1766045300-H9RgN6ZnTEcYXIenkvke', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The text does not specify the number of columns for any of the temples, so the total column count is not provided.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning='**Analyzing temple columns**\n\nI need to answer the question about how many columns the three temples have in total. The text identifies "three Doric temples" in Paestum, but it doesn\'t specify the column count for each temple. I have to consider typical column counts for Doric temples to infer this. The question may be a trick since it states "three temples" but lacks column details. I\'ll have to make sure to clarify that the answer isn\'t directly in the text.**Estimating column counts**\n\nI\'m considering the typical structure of Paestum\'s temples. The Temple of Hera I, a Doric temple, likely has a l

## Safer prompt implementation

In [14]:
def prompt_template(question, text):
    return f'Use the following pieces of retrieved context to answer the question. Only use the retrieved context to answer the question. If you don\'t know the answer, or the answer is not contained in the retrieved context, just say that you don\'t know. Use three sentences maximum and keep the answer concise. \nQuestion: {question}\nContext: {text}. Remember: if you do not know, just say: I do not know. Do not make up an answer. For example do not say the three temples have got a total of three columns. \nAnswer:'

In [15]:
trick_question = "How many columns have the three temples got in total?"
tq_result_text = query_vector_database(trick_question)
tq_prompt = prompt_template(trick_question , tq_result_text)
tq_prompt_response = execute_llm_prompt(tq_prompt)
print(tq_prompt_response)

ChatCompletion(id='gen-1766045626-KM4mZUit5OXJ6lnlyukW', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='I do not know.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None, reasoning_details=[{'id': 'rs_076d3b56e48a5a73016943b7bb601481939d37512814f95341', 'format': 'openai-responses-v1', 'index': 0, 'type': 'reasoning.encrypted', 'data': 'gAAAAABpQ7e9GYyLHGWeyuBD9qBbCptmuUFO-oWeLgaFO5l4RA_7XQ1c0DszzWBN71PTWzL9tCqbn8tuIZtfY4iGVwxt2jSwWWPJzsDoiqA6rKjbSdZIeICAwEAXSKdRznCb6tYDDMPtTQQlohnhPVflXvuR_n00xah-VCt-n25SbPMSmnGIOJ1nMnOcN4cnwgDrsvD3gPIyg5SBB_jIBKamcDaBtqGUsOSqhYL-qHReHwbL6E_wUudfx6qZZFgk5KSy2UZWgeFEZXJ6Lseku6B4lj6stGfDnL73aDXM6-yrJA-vkiM8NvowKKo0c780VmNZmNC0RIg6pUgGfr8weu_dDKeybBJjiMnNHXYD4VrEm6s2wcn2h6p0JQ4Tuub-0YI0dmfZ2t1hN7WfVd65dMHXflPUbt4_JUUFYc3PgTLmypkI4Kt9AqPFjay_c3N1bqYHT3NSjAziout7uLIrPrzv5hHLmgcU5dsR2LY5WC_DXloFs-c3_GKQnj7qi3SmYo0wb2SiNEeUvmJ5j3bAWIS6jGaHp9

## Building a chatbot

In [16]:
def my_chatbot(question):
    results_text = query_vector_database(question)
    prompt_input = prompt_template(question,
                                   results_text)
    prompt_output = execute_llm_prompt(
        prompt_input)

    return prompt_output

In [17]:
question = """Let me know how many temples there
are in Paestum, who constructed them, and what
architectural style they are"""
result = my_chatbot(question)
print(result)

ChatCompletion(id='gen-1766045791-HJsgX22wpU8VpdyTEnjQ', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Paestum has three temples in the ancient Greek part. They are Doric temples (Temple of Athena, Temple of Hera I, and Hera II). I do not know who constructed them based on the retrieved context.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning='**Summarizing temples in Paestum**\n\nI need to provide a concise answer to the question about Paestum\'s temples. There are three Doric temples in the ancient Greek part: the Temple of Athena (also known as Temple of Ceres), the Temple of Hera I (or Temple of Basilica), and the Temple of Hera II (Temple of Neptune). The first two date back to the 6th century BC, while Hera II dates to 460 BC and is the best preserved. The architectural style is Doric.**Clarifying construction details**\n\nThe context doesn\'t specify who construc