In [38]:
from openai import OpenAI
from elasticsearch import Elasticsearch

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [39]:
es_client = Elasticsearch('http://localhost:9200') 

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

  es_client.indices.create(index=index_name, body=index_settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [40]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [41]:
from tqdm.auto import tqdm

for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

100%|██████████████████████████████████████████████████████████████| 948/948 [00:23<00:00, 39.95it/s]


In [42]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [43]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [44]:

def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [None]:
query = 'I just disovered the course. Can I still join it?'
rag(query)

  response = es_client.search(index=index_name, body=search_query)


" The inference time for a PHI-III (Phi NN Accelerator) model, or any deep learning model, depends on various factors such as:\n\n1. Model complexity and size\n2. Implementation details in the deployment framework\n3. Input data characteristics (e.g., batch size, sequence length)\n4. System architecture and CPU specifications\n\nGiven a 16 GB RAM 4-core CPU without specifying an underlying deep learning library or network implementation, it's difficult to provide a precise inference time for the PHI-III model. However, let's discuss some general factors that might affect inference performance:\n\n1. Memory requirements: The memory bandwidth and available cache can influence how quickly data is loaded into RAM during each iteration of forward or backward propagation through the neural network layers. Although a 16 GB RAM may seem ample for deep learning models, larger models with large input batch sizes might require additional considerations to manage memory efficiently.\n2. CPU utiliz

## ELASTICSEARCH

In [32]:
from elasticsearch import Elasticsearch


In [35]:
es_client = Elasticsearch("http://localhost:9200")

In [36]:
es_client.info()

ObjectApiResponse({'name': 'ab877cb8fbfe', 'cluster_name': 'docker-cluster', 'cluster_uuid': '6lrC2wraQOWuUpkvz_TN7Q', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [37]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}


In [38]:
index_name = "course-questions"
es_client.indices.create(index=index_name, body = index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [39]:
from tqdm.auto import tqdm 

for doc in tqdm(documents):
    es_client.index(index=index_name,document=doc)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████████████████████████████████████| 948/948 [00:24<00:00, 39.16it/s]


In [46]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [47]:
query = "I just discovered the course. Can I still join?"

In [48]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [49]:
rag(query)

"Yes, you can still join the course even if it has already started. You're eligible to submit homeworks regardless of registration. However, please note that there will be deadlines for turning in the final projects. It's advisable not to leave everything for the last minute. If you're just starting, you might want to begin by installing and setting up all the dependencies and requirements such as Google Cloud account, Google Cloud SDK, Python 3 (installed with Anaconda), Terraform, and Git."