In [1]:
!python --version

Python 3.11.5


#### Q1 - Getting the data

In [2]:
!pip show requests

Name: requests
Version: 2.32.3
Summary: Python HTTP for Humans.
Home-page: https://requests.readthedocs.io
Author: Kenneth Reitz
Author-email: me@kennethreitz.org
License: Apache-2.0
Location: /home/sidd4ml/.pyenv/versions/3.11.5/lib/python3.11/site-packages
Requires: certifi, charset-normalizer, idna, urllib3
Required-by: google-api-core, google-cloud-storage, jupyterlab_server


In [3]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

#### Q2 -Indexing the data

In [4]:
# since 'pip install elasticsearch' installs elasticsearch 9.x which runs into compatibility issues with Elasticsearch 8.17.6
!pip install "elasticsearch>=8,<9"


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [5]:
# verifying 
import elasticsearch
print(f"elasticsearch : {elasticsearch.__version__}")

# 3. Connect to Elasticsearch
from elasticsearch import Elasticsearch
es = Elasticsearch("http://localhost:9200")
print(f"Elasticsearch: {es.info()['version']['number']}")

elasticsearch : (8, 18, 1)
Elasticsearch: 8.17.6


In [56]:
es.info()['version']['build_hash'] # verify the build hash

'dbcbbbd0bc4924cfeb28929dc05d82d662c527b7'

In [6]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-faq-index"

In [7]:
es.indices.create(index=index_name, body = index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-faq-index'})

In [8]:
!pip install tqdm


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [9]:
from tqdm.auto import tqdm

In [10]:
for doc in tqdm(documents):
 es.index(index=index_name, document = doc)

  0%|          | 0/948 [00:00<?, ?it/s]

#### Q3. Searching

In [11]:
query = "How do execute a command on a Kubernetes pod?"

In [12]:
search_query = {
    "size": 5,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["question^4", "text"],
                    "type": "best_fields"
                }
            }
        }
    }
} 

In [13]:
response = es.search(index=index_name, body=search_query)

In [14]:
scores =[]
for res in response['hits']['hits']:
    scores.append(res['_score'])

In [15]:
max(scores)

44.50556

#### Q4. Filtering

In [16]:
changed_query = "How do copy a file to a Docker container?"

In [17]:
search_query_updated={
    "size": 3,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": changed_query,
                    "fields": ["question^4", "text"],
                    "type": "best_fields"
                }
            },
            "filter": {
                "term": {
                    "course": "machine-learning-zoomcamp"
                }
            }
        }
    }
}

In [18]:
response_modified = es.search(index=index_name, body=search_query_updated)

In [19]:
questions = []

for res_mod in response_modified['hits']['hits']:
    questions.append(res_mod.get('_source').get('question'))

In [20]:
questions[2] #3rd question

'How do I copy files from a different folder into docker container’s working directory?'

#### Q5. Building a prompt

In [21]:
records_for_Q5 = []
for res_mod in response_modified['hits']['hits']:
    records_for_Q5.append(res_mod.get('_source'))
records_for_Q5

[{'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)',
  'section': '5. Deploying Machine Learning Models',
  'question': 'How do I debug a docker container?',
  'course': 'machine-learning-zoomcamp'},
 {'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani",
  'section': '5. Deploying Machine Learning Models',
  'question': 'How do I copy files from my local machine to docker container?',
 

In [41]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

context = ""

for record in records_for_Q5:
    context = context + f"Q: {record['question']}\nA: {record['text']}\n\n"   #strucutred exactly as per given 'context_template'

prompt = prompt_template.format(question=changed_query, context=context).strip()

In [42]:
len(prompt) # length of the resulting prompt

1446

In [58]:
print(prompt) # the prompt

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: How do copy a file to a Docker container?

CONTEXT:
Q: How do I debug a docker container?
A: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a command in the specific container:
docker ps (find the container-id)
docker exec -it <container-id> bash
(Marcos MJD)

Q: How do I copy files from my local machine to docker container?
A: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
To copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:
docker cp /path/to/local/file_or_directory container_id:/path/in/contain

#### Q6. Tokens

In [77]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.9.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [92]:
import tiktoken

In [94]:
# Create a custom encoding for Llama 3
llama3_encoding = tiktoken.get_encoding("cl100k_base")
  
# Encode the prompt
tokens = llama3_encoding.encode(prompt)

print(f"Token count: {len(tokens)}")

Token count: 323


### End of graded assignment