In [1]:
import requests

# Use the correct raw GitHub URL
docs_url = 'https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/01-intro/documents.json'
headers = {'User-Agent': 'Mozilla/5.0'}
try:
    docs_response = requests.get(docs_url, headers=headers, timeout=10)
    docs_response.raise_for_status()
    documents_raw = docs_response.json()
    print('Download successful!')
except requests.exceptions.RequestException as e:
    print(f'Error downloading file: {e}')

Download successful!


In [2]:
documents = []

for course in documents_raw:
    course_name = course['course']
    num_docs = len(course['documents'])
    print(f"Course: {course_name}, Number of documents: {num_docs}")

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

Course: data-engineering-zoomcamp, Number of documents: 435
Course: machine-learning-zoomcamp, Number of documents: 375
Course: mlops-zoomcamp, Number of documents: 138


In [3]:
from elasticsearch import Elasticsearch

# Connect to Elasticsearch (assumes running locally, security disabled)
es = Elasticsearch("http://localhost:9200")
try:
    info = es.info()
    print("Connected! Cluster info:", info)
except Exception as e:
    print("Connection failed:", e)

# Define the index mapping
mapping = {
    "mappings": {
        "properties": {
            "question": {"type": "text"},
            "text": {"type": "text"},
            "section": {"type": "text"},
            "course": {"type": "keyword"}
        }
    }
}

# Create the index (delete if exists)
index_name = "faq"
if es.indices.exists(index=index_name):
    es.indices.delete(index=index_name)
es.indices.create(index=index_name, body=mapping)

# Index the documents
for doc in documents:
    es.index(index=index_name, document={
        "question": doc.get("question", ""),
        "text": doc.get("text", ""),
        "section": doc.get("section", ""),
        "course": doc.get("course", "")
    })

print("Indexing complete!")

Connected! Cluster info: {'name': 'MacBook-Pro-2', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'YEQ4bsjDT8qOlij-7c_bCQ', 'version': {'number': '8.17.6', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'dbcbbbd0bc4924cfeb28929dc05d82d662c527b7', 'build_date': '2025-04-30T14:07:12.231372970Z', 'build_snapshot': False, 'lucene_version': '9.12.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}
Indexing complete!
Indexing complete!


In [5]:
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")

# Search for the query and print only the first 3 hits
query = {
    "query": {
        "multi_match": {
            "query": "How do execute a command on a Kubernetes pod?",
            "fields": ["question^4", "text"],
            "type": "best_fields"
        }
    }
}

response = es.search(index="faq", body=query)

for hit in response["hits"]["hits"][:3]:
    print(f"Score: {hit['_score']}")
    print(f"Question: {hit['_source'].get('question')}")
    print(f"Text: {hit['_source'].get('text')[:200]}...")
    print('-'*40)


Score: 44.50556
Question: How do I debug a docker container?
Text: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a ...
----------------------------------------
Score: 35.433445
Question: Kubernetes-dashboard
Text: Deploy and Access the Kubernetes Dashboard
Luke...
----------------------------------------
Score: 33.70974
Question: How do I copy files from a different folder into docker container’s working directory?
Text: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
In the Dockerfile, you can provide the folder containing the files that you want to...
----------------------------------------


In [7]:
# Search for the new question, filter by course, and print only the first 3 hits
query = {
    "query": {
        "bool": {
            "must": [
                {
                    "multi_match": {
                        "query": "How do copy a file to a Docker container?",
                        "fields": ["question^4", "text"],
                        "type": "best_fields"
                    }
                }
            ],
            "filter": [
                {
                    "term": {
                        "course": "machine-learning-zoomcamp"
                    }
                }
            ]
        }
    }
}

response = es.search(index="faq", body=query)

for hit in response["hits"]["hits"][:3]:
    print(f"Score: {hit['_score']}")
    print(f"Course: {hit['_source'].get('course')}")
    print(f"Question: {hit['_source'].get('question')}")
    print(f"Text: {hit['_source'].get('text')[:200]}...")
    print('-'*40)


Score: 73.38676
Course: machine-learning-zoomcamp
Question: How do I debug a docker container?
Text: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a ...
----------------------------------------
Score: 66.688705
Course: machine-learning-zoomcamp
Question: How do I copy files from my local machine to docker container?
Text: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
To copy a file or directory from your local machine into a running Docker container...
----------------------------------------
Score: 59.812744
Course: machine-learning-zoomcamp
Question: How do I copy files from a different folder into docker container’s working directory?
Text: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
In the D

In [12]:
# Build context from the top 3 hits of the previous search
context_entries = []
for hit in response["hits"]["hits"][:3]:
    q = hit['_source'].get('question', '')
    a = hit['_source'].get('text', '')
    context_entries.append(f"Q: {q}\nA: {a}")
context = "\n\n".join(context_entries).strip()

# Build the prompt
question = "How do I execute a command in a running docker container?"
prompt_template = '''
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
'''.strip()

prompt = prompt_template.format(question=question, context=context)
print(f"Prompt length: {len(prompt)}")
print("\n---\n")
print(prompt[:2500] + ("..." if len(prompt) > 2500 else ""))

Prompt length: 1462

---

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: How do I execute a command in a running docker container?

CONTEXT:
Q: How do I debug a docker container?
A: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a command in the specific container:
docker ps (find the container-id)
docker exec -it <container-id> bash
(Marcos MJD)

Q: How do I copy files from my local machine to docker container?
A: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
To copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:
docker cp /path/to/local/file_

In [13]:
# Calculate the number of tokens in the prompt using tiktoken
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o")
num_tokens = len(encoding.encode(prompt))
print(f"Number of tokens in the prompt: {num_tokens}")

Number of tokens in the prompt: 322
