# Homework

## Library

In [3]:
import requests 
import json
from tqdm.auto import tqdm
from elasticsearch import Elasticsearch

## Data

In [103]:
docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

## Questions

#### Question 1. Running Elastic

In [102]:
!curl localhost:9200

{
  "name" : "d51b9ad5a2af",
  "cluster_name" : "docker-cluster",
  "cluster_uuid" : "LHJ-n69KSQ-c5RqPmaKC0w",
  "version" : {
    "number" : "9.0.2",
    "build_flavor" : "default",
    "build_type" : "docker",
    "build_hash" : "0a58bc1dc7a4ae5412db66624aab968370bd44ce",
    "build_date" : "2025-05-28T10:06:37.834829258Z",
    "build_snapshot" : false,
    "lucene_version" : "10.1.0",
    "minimum_wire_compatibility_version" : "8.18.0",
    "minimum_index_compatibility_version" : "8.0.0"
  },
  "tagline" : "You Know, for Search"
}


#### Question 2. Indexing the data

In [104]:
es_client = Elasticsearch('http://localhost:9200')

In [105]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [106]:
for doc in documents:
    es_client.index(index=index_name, document=doc)

#### Question 3. Searching

In [107]:
query = "How do execute a command on a Kubernetes pod?"

In [108]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)

    result_scores = []
    
    for hit in response['hits']['hits']:
        result_scores.append(hit['_score'])

    return result_scores

In [109]:
elastic_search(query)

[44.50556, 35.433445, 33.70974, 33.2635, 32.589073]

#### Question 4. Filtering

In [110]:
query = "How do copy a file to a Docker container?"
query

'How do copy a file to a Docker container?'

In [111]:
def elastic_search(query):
    search_query = {
        "size": 3,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "machine-learning-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)

    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [112]:
search_results = elastic_search(query)

In [113]:
search_results[2]

{'text': 'You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nIn the Dockerfile, you can provide the folder containing the files that you want to copy over. The basic syntax is as follows:\nCOPY ["src/predict.py", "models/xgb_model.bin", "./"]\t\t\t\t\t\t\t\t\t\t\tGopakumar Gopinathan',
 'section': '5. Deploying Machine Learning Models',
 'question': 'How do I copy files from a different folder into docker container’s working directory?',
 'course': 'machine-learning-zoomcamp'}

#### Question 5. Building a prompt

In [114]:
def build_prompt(query, search_results):
    # Template for each context entry
    context_template = "Q: {question}\nA: {text}"

    # Build the context by formatting each search result
    context_entries = []
    for doc in search_results:
        entry = context_template.format(
            question=doc.get("question", "No question provided"),
            text=doc.get("text", "No answer provided")
        )
        context_entries.append(entry)

    # Join all entries with two line breaks
    context = "\n\n".join(context_entries)

    # Final prompt template
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: {context}
""".strip()

    # Format the final prompt
    prompt = prompt_template.format(question=query, context=context)
    return prompt


In [115]:
final_prompt = build_prompt(query, search_results)
prompt_length = len(final_prompt)
print(prompt_length)

1446


#### Question 6. Tokens

In [None]:
pip install tiktoken

In [72]:
import tiktoken

In [83]:
encoding = tiktoken.encoding_for_model("gpt-4o")
print(encoding)

<Encoding 'o200k_base'>


In [82]:
tokens = encoding.encode(final_prompt)
print(len(tokens))

320


#### Bonus question 1. Generating the answer

Load relevant libraries and set up the environment.

In [84]:
import os
from dotenv import load_dotenv
from mistralai import Mistral

In [90]:
# Load environment variables from .env file
load_dotenv()

api_key = os.getenv("MISTRAL_API_KEY")
model = "mistral-large-latest"
client = Mistral(api_key=api_key)

In [91]:
def llm(prompt):
    response = client.chat.complete(
    model=model,
    messages=[
        {"role": "user",
         "content": prompt}]
    )

    return response.choices[0].message.content

In [88]:
query = "How do copy a file to a Docker container?"

def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [118]:
response = rag(query)
print(response)

To copy a file to a Docker container, you can use the `docker cp` command. The basic syntax is as follows:

```
docker cp /path/to/local/file_or_directory container_id:/path/in/container
```


#### Bonus question 2. Calculating the costs

In [120]:
input_tokens = len(encoding.encode(final_prompt))
output_tokens = len(encoding.encode(response))

In [121]:
# Pricing per 1K tokens
input_price_per_1k = 0.005
output_price_per_1k = 0.015

# Calculate costs
input_cost = (input_tokens / 1000) * input_price_per_1k
output_cost = (output_tokens / 1000) * output_price_per_1k
total_cost = input_cost + output_cost

print(f"Input tokens: {input_tokens}")
print(f"Output tokens: {output_tokens}")
print(f"Total cost: ${total_cost:.4f}")

Input tokens: 320
Output tokens: 45
Total cost: $0.0023
