In [2]:
import openai
import elasticsearch
import os 

In [2]:
import json

with open('./documents.json', 'rt') as f_in:
    documents_file = json.load(f_in)

documents = []

for course in documents_file:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [3]:
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")
es.info()

ObjectApiResponse({'name': 'baf63c68d069', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'dKjnvS9IRnyFpokUM8b-rQ', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [4]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"
response = es.indices.create(index=index_name, body=index_settings)

response

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [5]:
from tqdm.auto import tqdm

for doc in tqdm(documents):
    es.index(index=index_name, document=doc)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 948/948 [00:33<00:00, 28.67it/s]


In [6]:
user_question = "How do I join the course after it has started?"

search_query = {
    "size": 5,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": user_question,
                    "fields": ["question^3", "text", "section"],
                    "type": "best_fields"
                }
            },
            "filter": {
                "term": {
                    "course": "data-engineering-zoomcamp"
                }
            }
        }
    }
}

In [7]:
response = es.search(index=index_name, body=search_query)

for hit in response['hits']['hits']:
    doc = hit['_source']
    print(f"Section: {doc['section']}")
    print(f"Question: {doc['question']}")
    print(f"Answer: {doc['text'][:60]}...\n")

Section: General course-related questions
Question: Course - Can I still join the course after the start date?
Answer: Yes, even if you don't register, you're still eligible to su...

Section: General course-related questions
Question: Course - Can I follow the course after it finishes?
Answer: Yes, we will keep all the materials after the course finishe...

Section: General course-related questions
Question: Course - What can I do before the course starts?
Answer: You can start by installing and setting up all the dependenc...

Section: General course-related questions
Question: How do I use Git / GitHub for this course?
Answer: After you create a GitHub account, you should clone the cour...

Section: Workshop 1 - dlthub
Question: How do I install the necessary dependencies to run the code?
Answer: Answer: To run the provided code, ensure that the 'dlt[duckd...



In [8]:
def retrieve_documents(query, index_name="course-questions", max_results=5):
    es = Elasticsearch("http://localhost:9200")
    
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }
    
    response = es.search(index=index_name, body=search_query)
    documents = [hit['_source'] for hit in response['hits']['hits']]
    return documents

In [9]:
user_question = "Kind cannot load docker image"

response = retrieve_documents(user_question)

for doc in response:
    print(f"Section: {doc['section']}")
    print(f"Question: {doc['question']}")
    print(f"Answer: {doc['text'][:60]}...\n")

Section: Module 2: Workflow Orchestration
Question: Push to docker image failure
Answer: denied: requested access to the resource is denied
This can ...

Section: Module 1: Docker and Terraform
Question: Docker - Cannot pip install on Docker container (Windows)
Answer: You may have this error:
Retrying (Retry(total=4, connect=No...

Section: Module 6: streaming with kafka
Question: Could not start docker image “control-center” from the docker-compose.yaml file.
Answer: Check Docker Compose File:
Ensure that your docker-compose.y...

Section: Module 1: Docker and Terraform
Question: Docker - Cannot connect to Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?
Answer: Make sure you're able to start the Docker daemon, and check ...

Section: Module 1: Docker and Terraform
Question: Docker - Cannot install docker on MacOS/Windows 11 VM running on top of Linux (due to Nested virtualization).
Answer: terraformRun this command before starting your VM:
On Intel ...



In [10]:
from openai import OpenAI

In [13]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [14]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "The course already started. Can I still join?"}]
)
print(response.choices[0].message.content)

While the ability to join a course after it has already started often depends on the specific institution or organization's policies, here are a few steps you can take to check if it's possible:

1. **Contact the Instructor or Institution**: Reach out directly to the course instructor or the administration office. They can provide the most accurate and immediate information regarding late enrollment.

2. **Review the Course Policies**: Check the course syllabus or the institution’s website to see if there are any specified guidelines regarding late enrollment.

3. **Catch Up on Missed Material**: If you are permitted to join late, ask for the materials or resources covering what has been taught so far. This could include lecture notes, reading materials, assignments, and recordings of past classes if available.

4. **Assess Workload and Time Commitment**: Determine if you can realistically catch up on the missed content while keeping up with ongoing coursework.

5. **Consider Future St

In [15]:
context_template = """
Section: {section}
Question: {question}
Answer: {text}
""".strip()

context_docs = retrieve_documents(user_question)

context_result = ""

for doc in context_docs:
    doc_str = context_template.format(**doc)
    context_result += ("\n\n" + doc_str)

context = context_result.strip()
print(context)

Section: Module 2: Workflow Orchestration
Question: Push to docker image failure
Answer: denied: requested access to the resource is denied
This can happen when you
Haven't logged in properly to Docker Desktop (use docker login -u "myusername")
Have used the wrong username when pushing to docker images. Use the same one as your username and as the one you build on
docker image build -t <myusername>/<imagename>:<tag>
docker image push <myusername>/<imagename>:<tag>

Section: Module 1: Docker and Terraform
Question: Docker - Cannot pip install on Docker container (Windows)
Answer: You may have this error:
Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.u
rllib3.connection.HTTPSConnection object at 0x7efe331cf790>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')':
/simple/pandas/
Possible solution might be:
$ winpty docker run -it --dns=8.8.8.8 --entrypoint=b

In [16]:
prompt = f"""
You're a course teaching assistant. Answer the user QUESTION based on CONTEXT - the documents retrieved from our FAQ database. 
Only use the facts from the CONTEXT. If the CONTEXT doesn't contan the answer, return "NONE"

QUESTION: {user_question}

CONTEXT:

{context}
""".strip()

In [17]:
context_template = """
Section: {section}
Question: {question}
Answer: {text}
""".strip()

prompt_template = """
You're a course teaching assistant.
Answer the user QUESTION based on CONTEXT - the documents retrieved from our FAQ database.
Don't use other information outside of the provided CONTEXT.  

QUESTION: {user_question}

CONTEXT:

{context}
""".strip()


def build_context(documents):
    context_result = ""
    
    for doc in documents:
        doc_str = context_template.format(**doc)
        context_result += ("\n\n" + doc_str)
    
    return context_result.strip()


def build_prompt(user_question, documents):
    context = build_context(documents)
    prompt = prompt_template.format(
        user_question=user_question,
        context=context
    )
    return prompt

def ask_openai(prompt, model="gpt-4o"):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    answer = response.choices[0].message.content
    return answer

def qa_bot(user_question):
    context_docs = retrieve_documents(user_question)
    prompt = build_prompt(user_question, context_docs)
    answer = ask_openai(prompt)
    return answer

In [18]:
qa_bot("I can't connect to postgres port 5432, my password doesn't work")

"If you are having trouble connecting to the Postgres port 5432 and your password doesn't work, it could be due to a couple of reasons:\n\n1. **Port Conflict:** If port 5432 is already taken by another Postgres instance on your machine, it could result in connection issues. You can resolve this by mapping your docker container to a different port, such as 5431. For example:\n   ```python\n   engine = create_engine('postgresql://root:root@localhost:5431/ny_taxi')\n   ```\n   Use the port you have mapped instead of 5432.\n\n2. **Local Postgres Service:** If you have a local Postgres service running on your machine, it could interfere with your Docker container's connection. In this case, stopping the local Postgres service may resolve the issue. On Windows, you can stop the service by going to Services and stopping the Postgres service.\n\n3. **Incorrect Password or Role:** Ensure you have the correct username and password. If you are using 'root' but the role does not exist in your Post

In [5]:
qa_bot("What can I have for lunch today in munich")

NameError: name 'qa_bot' is not defined