In [None]:
import json

from tools.elastic_search.elastic_search_client import ElasticSearchClient
from tools.elastic_search.index_settings import ElasticSearchIndexSettings

from tools.llms.openai.azure_openai import AzureOpenAIClient

# Preparing data

In [None]:
with open('./documents.json', 'rt') as f_in:
    documents_file = json.load(f_in)

In [None]:
def prepare_data(documents_file):
    documents = []

    for course in documents_file:
        course_name = course['course']

        for doc in course['documents']:
            doc['course'] = course_name
            documents.append(doc)
    return documents

In [None]:
documents = prepare_data(documents_file)

# Database Retrieval - ElasticSearch

After running the docker command:

```
docker run -it \
    --name elasticsearch \
    -p 9200:9200 \
    -p 9300:9300 \
    -e "discovery.type=single-node" \
    -e "xpack.security.enabled=false" \
    docker.elastic.co/elasticsearch/elasticsearch:8.4.3
```

or reconnected the elasticsearch docker

In [None]:
esc = ElasticSearchClient(host='localhost', port='9200')

In [None]:
properties = {
    "text": {"type": "text"},
    "section": {"type": "text"},
    "question": {"type": "text"},
    "course": {"type": "keyword"}
    }
index_name = "course-questions"
es_index = ElasticSearchIndexSettings(name=index_name, properties=properties)
esc.create_index(es_index)

In [None]:
esc.index_documents(index_name=index_name, documents=documents)

In [None]:
user_question = "How do I join the course after it has started?"

search_query = {
    "size": 5,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": user_question,
                    "fields": ["question^3", "text", "section"],
                    "type": "best_fields"
                }
            },
            "filter": {
                "term": {
                    "course": "data-engineering-zoomcamp"
                }
            }
        }
    }
}

response = esc.search(index_name=index_name, search_query=search_query)

results = esc.extract_info_from_search(response)

# Prompting - OpenAI

In [None]:
def build_context(response):
    context = ""

    for doc in response:
        doc_str = f"Section: {doc['section']}\nQuestion: {doc['question']}\nAnswer: {doc['text']}\n\n"
        context += doc_str

    context = context.strip()
    print(context)

In [None]:
oa_client = AzureOpenAIClient()

In [None]:
system_prompt = """You're a course teaching assistant. Answer the user QUESTION based on CONTEXT - the documents retrieved from our FAQ database. 
Only use the facts from the CONTEXT. If the CONTEXT doesn't contan the answer, return "NONE"}]"""

user_prompt = """
QUESTION: {user_question}

CONTEXT:

{context}
""".strip()

messages=[{"role": "system", "content": system_prompt},
          {"role": "user", "content": user_prompt}]

In [None]:
oa_client.chat(messages=messages)

# Specific RAG

In [None]:
from tools.rag.elastsearch_openai_rag import ElastSearchOpenAIRAG

: 

In [None]:
esc = ElasticSearchClient(host='localhost', port='9200')
oa_client = AzureOpenAIClient()

: 

In [None]:
es_oa_rag = ElastSearchOpenAIRAG(vector_db_client=esc, embedding_model=None, chat_model=oa_client)

: 

In [None]:
es_oa_rag.load_docs_into_vector_db(documents)

In [None]:
es_oa_rag.chat_workflow("I can't connect to postgres port 5432, my password doesn't work")