# OpenAI connection (not free)

In [1]:
import openai

In [2]:
from openai import OpenAI

In [3]:
client=OpenAI()

In [7]:
client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[{"role": "user", "content":"is it too late to joint the course ?"}]
)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

# GroQ connection (free)

In [13]:
import yaml

# Open the file
with open('api_keys.yml', 'r') as file:
    # Load the data from the file
    data = yaml.safe_load(file)
    
# Get the API key (Groq example here)
groq_api_key = data['GROQ_API_KEY']

In [14]:
# import libraries
from groq import Groq
import os

# create client calling Groq class
client = Groq(api_key=groq_api_key)

# create a query
response = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Is it too late to join the course?",
        }
    ],
    model="llama3-8b-8192",
)

# print the response
print(response.choices[0].message.content)

I'm happy to help you with your question! However, I need more information to provide a accurate answer. Can you please tell me which course you are referring to and is there any specific deadline or window to enroll in that course?


# Homework week 1

In [20]:
from elasticsearch import Elasticsearch
import requests 
from tqdm import tqdm

In [49]:
# Import needed documents (FAQ DE zoomcamp)

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [76]:
documents[1]

{'text': 'GitHub - DataTalksClub data-engineering-zoomcamp#prerequisites',
 'section': 'General course-related questions',
 'question': 'Course - What are the prerequisites for this course?',
 'course': 'data-engineering-zoomcamp'}

In [37]:
# Create ElasticSearch client after connection in VSCode terminal
es_client = Elasticsearch("http://localhost:9200")

In [38]:
#Create an index
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

In [77]:
index_name = "course_questions"

In [74]:
# If we need to delete all indices:
!curl -X DELETE "http://localhost:9200/course_questions"


{"acknowledged":true}

In [78]:
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course_questions'})

In [79]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

100%|███████████████████████████████████████████████████████████████████████████████████████| 948/948 [00:18<00:00, 51.67it/s]


In [43]:
query = "How do I execute a command in a running docker container?"

In [44]:
def elastic_search(query, course):

    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": course
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)

    result_docs = []
    result_scores = []
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
        result_scores.append(hit['_score'])

    return response, result_docs, result_scores

In [80]:
response_query, top_doc, top_score = elastic_search(query, course = "data-engineering-zoomcamp")

In [81]:
response_query

ObjectApiResponse({'took': 39, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 390, 'relation': 'eq'}, 'max_score': 75.54128, 'hits': [{'_index': 'course_questions', '_id': 'UVpUQJABHzC2WF4MYOBV', '_score': 75.54128, '_source': {'text': 'In case running pgcli  locally causes issues or you do not want to install it locally you can use it running in a Docker container instead.\nBelow the usage with values used in the videos of the course for:\nnetwork name (docker network)\npostgres related variables for pgcli\nHostname\nUsername\nPort\nDatabase name\n$ docker run -it --rm --network pg-network ai2ys/dockerized-pgcli:4.0.1\n175dd47cda07:/# pgcli -h pg-database -U root -p 5432 -d ny_taxi\nPassword for root:\nServer: PostgreSQL 16.1 (Debian 16.1-1.pgdg120+1)\nVersion: 4.0.1\nHome: http://pgcli.com\nroot@pg-database:ny_taxi> \\dt\n+--------+------------------+-------+-------+\n| Schema | Name             | Type  | Owner |\n

In [82]:
top_score

[75.54128, 43.922554, 38.684105, 38.33403, 35.94081]

In [83]:
top_doc

[{'text': 'In case running pgcli  locally causes issues or you do not want to install it locally you can use it running in a Docker container instead.\nBelow the usage with values used in the videos of the course for:\nnetwork name (docker network)\npostgres related variables for pgcli\nHostname\nUsername\nPort\nDatabase name\n$ docker run -it --rm --network pg-network ai2ys/dockerized-pgcli:4.0.1\n175dd47cda07:/# pgcli -h pg-database -U root -p 5432 -d ny_taxi\nPassword for root:\nServer: PostgreSQL 16.1 (Debian 16.1-1.pgdg120+1)\nVersion: 4.0.1\nHome: http://pgcli.com\nroot@pg-database:ny_taxi> \\dt\n+--------+------------------+-------+-------+\n| Schema | Name             | Type  | Owner |\n|--------+------------------+-------+-------|\n| public | yellow_taxi_data | table | root  |\n+--------+------------------+-------+-------+\nSELECT 1\nTime: 0.009s\nroot@pg-database:ny_taxi>',
  'section': 'Module 1: Docker and Terraform',
  'question': 'PGCLI - running in a Docker container',
 

In [84]:
response_query, top_doc, top_score = elastic_search(query, course = "machine-learning-zoomcamp")

In [87]:
response_query

ObjectApiResponse({'took': 19, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 345, 'relation': 'eq'}, 'max_score': 84.050095, 'hits': [{'_index': 'course_questions', '_id': 'Q1pUQJABHzC2WF4MheLh', '_score': 84.050095, '_source': {'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)', 'section': '5. Deploying Machine Learning Models', 'question': 'How do I debug a docker container?', 'course': 'machine-learning-zoomcamp'}}, {'_index': 'course_questions', '_id': 'YlpUQJABHzC2WF4MiOIy', '_score': 51.04628, '_source': {'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo 

In [85]:
# Get the third hit question
response_query['hits']['hits'][2]['_source']['question']

'How do I copy files from a different folder into docker container’s working directory?'

In [86]:
top_doc

[{'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)',
  'section': '5. Deploying Machine Learning Models',
  'question': 'How do I debug a docker container?',
  'course': 'machine-learning-zoomcamp'},
 {'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani",
  'section': '5. Deploying Machine Learning Models',
  'question': 'How do I copy files from my local machine to docker container?',
 

In [None]:
"How do I execute a command in a running docker container?"

In [102]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {'How do I debug a docker container?'}
CONTEXT: {
{
    'question': "How do I copy files from my local machine to docker container?",
    'solution': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani"
}
\n\n
{
    'question': "How do I copy files from a different folder into docker container’s working directory?",
    'solution': "You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nIn the Dockerfile, you can provide the folder containing the files that you want to copy over. The basic syntax is as follows:\nCOPY ["src/predict.py", "models/xgb_model.bin", "./"]\t\t\t\t\t\t\t\t\t\t\tGopakumar Gopinathan",
}
\n\n
{
    'question': "The input device is not a TTY when running docker in interactive mode (Running Docker on Windows in GitBash)",
    'solution': "$ docker exec -it 1e5a1b663052 bash\nthe input device is not a TTY.  If you are using mintty, try prefixing the command with 'winpty'\nFix:\nwinpty docker exec -it 1e5a1b663052 bash\nA TTY is a terminal interface that supports escape sequences, moving the cursor around, etc.\nWinpty is a Windows software package providing an interface similar to a Unix pty-master for communicating with Windows console programs.\nMore info on terminal, shell, console applications hi and so on:\nhttps://conemu.github.io/en/TerminalVsShell.html\n(Marcos MJD)",
}

""".strip()

In [103]:
prompt_template

'You\'re a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.\nUse only the facts from the CONTEXT when answering the QUESTION.\n\nQUESTION: {\'How do I debug a docker container?\'}\nCONTEXT: {\n{\n    \'question\': "How do I copy files from my local machine to docker container?",\n    \'solution\': "You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani"\n}\n\n\n\n{\n    \'question\': "How do I copy files from a different folder into docker container’s working directory?",\n    \'solution\': "You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nIn the Dockerfile, you can

In [104]:
len(prompt_template)

1848

In [105]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting regex>=2022.1.18 (from tiktoken)
  Downloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m0m eta [36m0:00:01[0m
[?25hDownloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (775 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m775.1/775.1 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m
[?25hInstalling collected packages: regex, tiktoken
Successfully installed regex-202

In [107]:
import tiktoken

In [108]:
encoding = tiktoken.encoding_for_model("gpt-4o")

In [113]:
len(encoding.encode(prompt_template))

428