# Prepare Open AI client object

In [1]:
from openai import OpenAI

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [3]:
client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [4]:
client

<openai.OpenAI at 0x1127a7a70>

# Prepare documents for information retrieval

In [5]:
import json

In [6]:
with open('documents.json', 'rt') as f_in:
    docs_raw=json.load(f_in)

In [7]:
documents=[]

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

# RAG with Elastic Search

In [8]:
from elasticsearch import Elasticsearch

In [9]:
es_client=Elasticsearch('http://localhost:9200')

In [10]:
es_client.info()

ObjectApiResponse({'name': '835abff421df', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'xw6W0pMoRRiWvXbuISTmBw', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [11]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

#Create an index in elastic search (equivelant to a table in sql database)
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [12]:
for doc in documents:
    
    es_client.index(index=index_name, document=doc)

In [13]:
def elastic_search(query):
    
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        #a question is 3 times more important than the text or section for the search results
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": { #limit questions about only one part of the documents available
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    search_results = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in search_results['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [14]:
def build_prompt(query, search_results):
    
    prompt_template = """
    QUESTION: {question}

    CONTEXT:
    {context}
    
    ANSWER:
    """.strip()
    
    context_template = """
        section:{section}
        question: {question}
        answer: {text}
        """.strip()

    context_list=[]
    
    for doc in search_results:
        context_list.append(context_template.format(**doc))
    
    context = '/n/n'.join(context_list)
    
    prompt = prompt_template.format(question=query, context=context).strip()
        
    return prompt

# Load model locally

## phi3

In [15]:
def llm(prompt):
    
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [16]:
def elastic_rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return search_results, prompt, answer

In [17]:
query= 'i just found out about this course, can i still join?'

In [18]:
search_results, prompt, answer = elastic_rag(query)

In [19]:
answer

" Yes, you can still join the course even after the start date has passed. However, there will be deadlines for submitting final projects, so it's recommended not to leave everything until the last minute. Once enrolled in a self-paced online course that is provided by Zoomcamp Q&A Bootcamp (ZoomCamp), you can expect similar experiences as outlined below:\n\n1. Before starting the class – As with any new learning experience, take some time to install necessary dependencies and prepare yourself before diving into the materials. Relevant prerequisites include having a Google Cloud account set up along with installing Python 3 (with Anaconda), Terraform, Git on your machine, among other requirements that will be found in their course syllabus or FAQ section if already available online for this specific Zoomcamp boot camp Q&A Boot Camp.\n\n2. Continuing the class after it ends – After completing a self-paced ZoomCamp course session, they keep all materials accessible to allow students more