In [27]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [28]:
from elasticsearch import Elasticsearch

In [29]:
es_client = Elasticsearch('http://localhost:9200') 

In [30]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [31]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [32]:
from tqdm.auto import tqdm

In [33]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/948 [00:00<?, ?it/s]

In [35]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [36]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

# Function to call LLM with the built prompt
# def llm(prompt):
#     try:
#         response = client.chat.completions.create(
#             model='phi3',
#             messages=[{"role": "user", "content": prompt}]
#         )
#         return response.choices[0].message.content
#     except Exception as e:
#         if 'requires more system memory' in str(e):
#             # Retry with a smaller context if memory error occurs
#             return llm_with_smaller_context(prompt)
#         else:
#             raise e

# # Function to retry LLM with a smaller context
# def llm_with_smaller_context(prompt):
#     # Extract context from the prompt
#     context_start = prompt.index("CONTEXT:") + len("CONTEXT:") + 1
#     context = prompt[context_start:]

#     # Split context into individual sections
#     sections = context.split('\n\n')
    
#     if len(sections) > 1:
#         # Use only the first half of the sections
#         smaller_context = '\n\n'.join(sections[:len(sections) // 2])
#         smaller_prompt = prompt[:context_start] + smaller_context
#         try:
#             response = client.chat.completions.create(
#                 model='phi3',
#                 messages=[{"role": "user", "content": smaller_prompt}]
#             )
#             return response.choices[0].message.content
#         except Exception as e:
#             if 'requires more system memory' in str(e):
#                 # Fallback response if the smaller context also fails
#                 return "The system is currently experiencing high memory usage. Please try again later."
#             else:
#                 raise e
#     else:
#         return "The system is currently experiencing high memory usage. Please try again later."


In [37]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [38]:
query = 'I just disovered the course. Can I still join it?'
rag(query)

'Based on the FAQ database, if you have discovered the Data Engineering Bootcamp course late or after it started already, yes, you can still join and participate as you are eligible to submit assignments even without prior registration. However, there will be deadlines for final projects submission, so plan your work accordingly.'