1. Check the version.build_hash value

Using elasticsearch==9.0.2 instead of elasticsearch==8.4.3 due to errors

In [2]:
# Elasticsearch server version (must be running)
!curl -s http://localhost:9200 | grep number

!pip list | grep elasticsearch
# Python client version (in current environment)


    "number" : "9.0.2",
elasticsearch             9.0.2


In [3]:
!curl -s localhost:9200 | jq '.version.build_hash'

[0;32m"0a58bc1dc7a4ae5412db66624aab968370bd44ce"[0m


Get the FAQ data

In [10]:
import requests
# import minsearch
import cohere
import os
import json
from elasticsearch import Elasticsearch
import tiktoken

In [4]:
docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

# Print the type of the 'documents' variable to confirm it's a list
print(type(documents))

# Print the number of documents loaded
print(len(documents))

# Print the first document to inspect its structure
print(documents[0])

2. Index the data

In [18]:
q = "How do execute a command on a Kubernetes pod?"

"""
# Implementing RAG with MinSearch
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

results = index.search(
    query=q,
    #filter_dict={'course': 'machine-learning-zoomcamp'},
    boost_dict={'question': 4.0, 'text': 1.0},
    num_results=5
)

print(results)
"""

'\n# Implementing RAG with MinSearch\nindex = minsearch.Index(\n    text_fields=["question", "text", "section"],\n    keyword_fields=["course"]\n)\n\nindex.fit(documents)\n\nresults = index.search(\n    query=q,\n    #filter_dict={\'course\': \'machine-learning-zoomcamp\'},\n    boost_dict={\'question\': 4.0, \'text\': 1.0},\n    num_results=5\n)\n\nprint(results)\n'

In [17]:
# Connect to Elasticsearch
es = Elasticsearch("http://localhost:9200")

# Create the index if it doesn't exist
if not es.indices.exists(index="my-index"):
    es.indices.create(index="my-index")

# Index each document in 'documents'
for doc in documents:
    es.index(index="my-index", document=doc)


3. Check the score for the top ranking result

In [19]:
# Define the search query
search_query = {
    "multi_match": {
        "query": q,
        "fields": ["question^4", "text"],
        "type": "best_fields"
    }
}

# Execute the search using top-level 'query' parameter
response = es.search(
    index="my-index",
    query=search_query,
    size=1  # Get only the top result
)

# Extract the top result's score
hits = response["hits"]["hits"]
if hits:
    top_score = hits[0]["_score"]
    print(f"Top result score: {top_score}")
else:
    print("No results found.")


Top result score: 44.50556


4. Filtering

In [25]:
q2 = "How do copy a file to a Docker container?"

# Define the search query
search_query = {
    "multi_match": {
        "query": q2,
        "fields": ["question^4", "text"],
        "type": "best_fields"
    }
}

# Execute the search using top-level 'query' parameter
response = es.search(
    index="my-index",
    query=search_query,
    size=3  # Get the top 3 results
)

# Print 3rd result
print(response["hits"]["hits"][2]["_source"]["question"])

How do I copy files from a different folder into docker container’s working directory?


5. Building a prompt

In [37]:
context = ""

for doc in response["hits"]["hits"]:
    # context = context + f"section: {doc['_source']['section']}\n question: {doc['_source']['question']}\n answer: {doc['_source']['text']}\n\n"
    context =  context + f"Q: {doc['_source']['question']} A: {doc['_source']['text']}".strip()

prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

prompt = prompt_template.format(question=q2, context=context).strip()

print("Prompt: ", prompt)
print("\nLength of prompt: ", len(prompt))

Prompt:  You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: How do copy a file to a Docker container?

CONTEXT:
Q: How do I debug a docker container? A: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a command in the specific container:
docker ps (find the container-id)
docker exec -it <container-id> bash
(Marcos MJD)Q: How do I copy files from my local machine to docker container? A: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
To copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:
docker cp /path/to/local/file_or_directory container_id:/path/in/

6. Tokens

In [40]:
encoding = tiktoken.encoding_for_model("gpt-4o")

# Count the number of tokens in the prompt
prompt = prompt_template.format(question=q2, context=context).strip()
print(len(encoding.encode(prompt)))


319


Bonus

Use cohere to generate answer 

In [42]:
def initiate_cohere():

    api_key = os.getenv("COHERE_API_KEY")

    if api_key is None:
        raise EnvironmentError("COHERE_API_KEY environment variable not set. Please set it before running this notebook.")

    co = cohere.ClientV2()

    return co

def pretty_print_cohere_response(response):
    """
    Pretty-print a Cohere API response object using its .dict() method.
    
    Args:
        response: A Cohere API response object (e.g., ChatResponse).
    """
    response_dict = response.dict()
    print(json.dumps(response_dict, indent=2))

co = initiate_cohere()

In [44]:
response = co.chat(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": q2}],
    documents=[context]
)

pretty_print_cohere_response(response)

{
  "id": "cae87f63-c3bb-4622-95c8-f28c6a59ee33",
  "finish_reason": "COMPLETE",
  "message": {
    "role": "assistant",
    "content": [
      {
        "type": "text",
        "text": "You can copy files from your local machine into a Docker container using the docker cp command. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container"
      }
    ],
    "citations": [
      {
        "start": 77,
        "end": 95,
        "text": "docker cp command.",
        "sources": [
          {
            "type": "document",
            "id": "doc:0",
            "document": {
              "content": "Q: How do I debug a docker container? A: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id>

In [47]:
print("Question: ", q2)
print("Answer: ", response.message.content[0].text)

Question:  How do copy a file to a Docker container?
Answer:  You can copy files from your local machine into a Docker container using the docker cp command. The basic syntax is as follows:
docker cp /path/to/local/file_or_directory container_id:/path/in/container
