In [7]:
import json
import os
import requests

from tqdm.auto import tqdm

In [8]:
docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [9]:
documents[0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [1]:
!curl http://localhost:9200

{
  "name" : "99e78d94dd39",
  "cluster_name" : "docker-cluster",
  "cluster_uuid" : "1q_klH2BTn-MYN7_VpLqrQ",
  "version" : {
    "number" : "8.4.3",
    "build_flavor" : "default",
    "build_type" : "docker",
    "build_hash" : "42f05b9372a9a4a470db3b52817899b99a76ee73",
    "build_date" : "2022-10-04T07:17:24.662462378Z",
    "build_snapshot" : false,
    "lucene_version" : "9.3.0",
    "minimum_wire_compatibility_version" : "7.17.0",
    "minimum_index_compatibility_version" : "7.0.0"
  },
  "tagline" : "You Know, for Search"
}


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   539  100   539    0     0  31084      0 --:--:-- --:--:-- --:--:-- 31705


In [2]:
from elasticsearch import Elasticsearch
from elasticsearch import NotFoundError

es = Elasticsearch('http://localhost:9200',   request_timeout=60)

INDEX_NAME =  "course-questions"

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"}
        }
    }
}

In [3]:
# Отримати список індексів
!curl -X GET "localhost:9200/_cat/indices?v"

health status index uuid pri rep docs.count docs.deleted store.size pri.store.size


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100    83  100    83    0     0   2463      0 --:--:-- --:--:-- --:--:--  2515


In [4]:
try:
    es.indices.get(index=INDEX_NAME)
    print(f"{INDEX_NAME} already exists")
    # Використання options() для передачі транспортних параметрів
    es.options(ignore_status=[400, 404]).indices.delete(index=INDEX_NAME)
except NotFoundError:
    response = es.indices.create(index=INDEX_NAME,
                                           settings=index_settings['settings'],
                                           mappings=index_settings['mappings'])
    print(response)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'}


In [5]:
# переглянути інформацію про вузли
!curl -X GET "localhost:9200/_cat/nodes?v"

ip         heap.percent ram.percent cpu load_1m load_5m load_15m node.role   master name
172.17.0.2            7          89   1    0.01    0.23     0.18 cdfhilmrstw *      99e78d94dd39


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   186  100   186    0     0   5440      0 --:--:-- --:--:-- --:--:--  5470


In [10]:
# Проиндексируйте документы
for doc in tqdm(documents):
    es.index(index=INDEX_NAME, document=doc)

print("Indexing complete.")

  0%|          | 0/948 [00:00<?, ?it/s]

Indexing complete.


In [19]:
from pprint import pprint

# Определите поисковый запрос
query="How do I execute a command in a running docker container?"

search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields"
                    }
                }
            }
        }
}

# Выполните поиск
response = es.search(index=INDEX_NAME,
size=search_query['size'],
query=search_query['query'])


# Выведите топовый результат
top_result = response['hits']['hits'][0]
pprint(top_result)
print(f"Top result score: {top_result['_score']}")

{'_id': '7jXpS5ABCPulKgLmfFkd',
 '_index': 'course-questions',
 '_score': 84.050095,
 '_source': {'course': 'machine-learning-zoomcamp',
             'question': 'How do I debug a docker container?',
             'section': '5. Deploying Machine Learning Models',
             'text': 'Launch the container image in interactive mode and '
                     'overriding the entrypoint, so that it starts a bash '
                     'command.\n'
                     'docker run -it --entrypoint bash <image>\n'
                     'If the container is already running, execute a command '
                     'in the specific container:\n'
                     'docker ps (find the container-id)\n'
                     'docker exec -it <container-id> bash\n'
                     '(Marcos MJD)'}}
Top result score: 84.050095


Тепер обмежимося питаннями про `machine-learning-zoomcamp`.
Повернути 3 результати. Яке 3-тє питання повертає пошукова система?

In [21]:
query="How do I execute a command in a running docker container?"

search_query = {
        "size": 3,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "machine-learning-zoomcamp"
                    }
                }                
            }
        }
}

# Выполните поиск
response = es.search(index=INDEX_NAME,
size=search_query['size'],
query=search_query['query'])

In [13]:
res = {}
res['hits']={'total': {'value': 345, 'relation': 'eq'},
 'max_score': 84.050095,
 'hits': [{'_index': 'course-questions',
   '_id': '7jXpS5ABCPulKgLmfFkd',
   '_score': 84.050095,
   '_source': {'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)',
    'section': '5. Deploying Machine Learning Models',
    'question': 'How do I debug a docker container?',
    'course': 'machine-learning-zoomcamp'}},
  {'_index': 'course-questions',
   '_id': 'DTXpS5ABCPulKgLmfVpf',
   '_score': 51.04628,
   '_source': {'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani",
    'section': '5. Deploying Machine Learning Models',
    'question': 'How do I copy files from my local machine to docker container?',
    'course': 'machine-learning-zoomcamp'}},
  {'_index': 'course-questions',
   '_id': 'DjXpS5ABCPulKgLmfVpn',
   '_score': 49.938507,
   '_source': {'text': 'You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nIn the Dockerfile, you can provide the folder containing the files that you want to copy over. The basic syntax is as follows:\nCOPY ["src/predict.py", "models/xgb_model.bin", "./"]\t\t\t\t\t\t\t\t\t\t\tGopakumar Gopinathan',
    'section': '5. Deploying Machine Learning Models',
    'question': 'How do I copy files from a different folder into docker container’s working directory?',
    'course': 'machine-learning-zoomcamp'}}]}

In [16]:
from pprint import pprint
pprint(res['hits'])

{'hits': [{'_id': '7jXpS5ABCPulKgLmfFkd',
           '_index': 'course-questions',
           '_score': 84.050095,
           '_source': {'course': 'machine-learning-zoomcamp',
                       'question': 'How do I debug a docker container?',
                       'section': '5. Deploying Machine Learning Models',
                       'text': 'Launch the container image in interactive mode '
                               'and overriding the entrypoint, so that it '
                               'starts a bash command.\n'
                               'docker run -it --entrypoint bash <image>\n'
                               'If the container is already running, execute a '
                               'command in the specific container:\n'
                               'docker ps (find the container-id)\n'
                               'docker exec -it <container-id> bash\n'
                               '(Marcos MJD)'}},
          {'_id': 'DTXpS5ABCPulKgLmfVpf',
   

Q3: 'How do I copy files from a different folder into docker container’s working directory?'

In [24]:
result_docs = []
    
for hit in response['hits']['hits']:
    result_docs.append(hit['_source'])

result_docs
for doc in result_docs:
    print(doc, '\n\n')

{'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)', 'section': '5. Deploying Machine Learning Models', 'question': 'How do I debug a docker container?', 'course': 'machine-learning-zoomcamp'} 


{'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani", 'section': '5. Deploying Machine Learning Models', 'question': 'How do I copy files from my local machine to docker container?', 'course': '

In [27]:
context=""
for doc in result_docs:
    context = context + f"Q: {doc['question']}\nA: {doc['text']}\n\n"
context

'Q: How do I debug a docker container?\nA: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)\n\nQ: How do I copy files from my local machine to docker container?\nA: You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani\n\nQ: How do I copy files from a different folder into docker container’s working directory?\nA: You can copy files from your local machine into a Docker container using the docker cp comman

In [30]:
def build_prompt(query, search_results):
    prompt_template = """
Ви асистент викладача курсу. Дайте відповідь на ЗАПИТАННЯ на основі КОНТЕКСТУ з бази даних поширених запитань.
Відповідаючи на ЗАПИТАННЯ, використовуйте лише факти з КОНТЕКСТУ.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"Q: {doc['question']}\nA: {doc['text']}\n\n"    
        
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [31]:
prompt = build_prompt(query, result_docs)

print("Length of the prompt: ", len(prompt))

Length of the prompt:  1475


In [4]:
len(prompt_template)

206

In [4]:
context_template = """
Q: {question}
A: {text}
""".strip()

prompt_template = """

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

In [2]:
context = 'Q: How do I debug a docker container?\nA: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)\n\nQ: How do I copy files from my local machine to docker container?\nA: You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani\n\nQ: How do I copy files from a different folder into docker container’s working directory?\nA: You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nIn the Dockerfile, you can provide the folder containing the files that you want to copy over. The basic syntax is as follows:\nCOPY ["src/predict.py", "models/xgb_model.bin", "./"]\t\t\t\t\t\t\t\t\t\t\tGopakumar Gopinathan\n\n'

In [5]:

query="How do I execute a command in a running docker container?"
prompt = prompt_template.format(question=query, context=context).strip()

In [6]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o")
print("Length of the prompt: ", len(prompt))

tokens=encoding.encode(prompt)
print('Num tokens: ', len(tokens))

Length of the prompt:  1462
Num tokens:  322


In [7]:
from dotenv import load_dotenv

load_dotenv()

from groq import Groq
from dotenv import load_dotenv

# Завантаження змінних середовища з файлу .env
load_dotenv()

client = Groq()

response = client.chat.completions.create(
        model='llama3-8b-8192',
        messages=[{"role": "user", "content": prompt}]
)
    
output = response.choices[0].message.content
print(output)

Based on the context, to execute a command in a running Docker container, you can use the `docker exec` command with the container ID, similar to the example provided:

`docker exec -it <container-id> bash`

Replace `<container-id>` with the actual ID of the container you want to execute the command in.


In [8]:
tokens=encoding.encode(output)
print('Num tokens: ', len(tokens))

Num tokens:  66


In [13]:
print(prompt)

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: How do I execute a command in a running docker container?

CONTEXT:
Q: How do I debug a docker container?
A: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a command in the specific container:
docker ps (find the container-id)
docker exec -it <container-id> bash
(Marcos MJD)

Q: How do I copy files from my local machine to docker container?
A: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
To copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:
docker cp /path/to/local/file_or_directory container_id:

In [5]:
import pandas as pd

df = pd.DataFrame(documents, columns=['course', 'section', 'question', 'text'])
df.head()

Unnamed: 0,course,section,question,text
0,data-engineering-zoomcamp,General course-related questions,Course - When will the course start?,The purpose of this document is to capture fre...
1,data-engineering-zoomcamp,General course-related questions,Course - What are the prerequisites for this c...,GitHub - DataTalksClub data-engineering-zoomca...
2,data-engineering-zoomcamp,General course-related questions,Course - Can I still join the course after the...,"Yes, even if you don't register, you're still ..."
3,data-engineering-zoomcamp,General course-related questions,Course - I have registered for the Data Engine...,You don't need it. You're accepted. You can al...
4,data-engineering-zoomcamp,General course-related questions,Course - What can I do before the course starts?,You can start by installing and setting up all...
