# Monitoring
### In this module we will discuss monitoring which differs than evaluation module.
### In this module, we evluate the entire RAG pipeline to ensure that the system works as we expect.
### In the evaluation module, we tested only that the Retrival part is working good.

## In this module we mainly conisder offline monitoring which means before production.

## Load documents with IDS

In [1]:
import requests
import pandas as pd
from tqdm import tqdm
from openai import OpenAI
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Loading the documents. remeber this was the initial dataset we have that
# has all the "answers" that we used to generate 5 Qs per answer to have 
# the ground truth dataset that we evaluate upon
base_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main'
relative_url = '03-vector-search/eval/documents-with-ids.json'
docs_url = f'{base_url}/{relative_url}?raw=1'
docs_response = requests.get(docs_url)
documents = docs_response.json()

In [3]:
documents[10]

{'text': 'It depends on your background and previous experience with modules. It is expected to require about 5 - 15 hours per week. [source1] [source2]\nYou can also calculate it yourself using this data and then update this answer.',
 'section': 'General course-related questions',
 'question': 'Course - \u200b\u200bHow many hours per week am I expected to spend on this  course?',
 'course': 'data-engineering-zoomcamp',
 'id': 'ea739c65'}

## Load ground truth dataset

In [4]:
base_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main'
relative_url = '03-vector-search/eval/ground-truth-data.csv'
ground_truth_url = f'{base_url}/{relative_url}?raw=1'

df_ground_truth = pd.read_csv(ground_truth_url)
df_ground_truth = df_ground_truth[df_ground_truth.course == 'machine-learning-zoomcamp']
ground_truth = df_ground_truth.to_dict(orient='records')

In [5]:
ground_truth[10]

{'question': 'Are sessions recorded if I miss one?',
 'course': 'machine-learning-zoomcamp',
 'document': '5170565b'}

In [6]:
# for the documents data (not the ground truth) we are generating a dic
# id:doc
doc_idx= {doc['id']: doc for doc in documents}
doc_idx['5170565b']['text'] # the answer

'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.'

## Index the data

Run this command in your terminal to set up elastic search in docker to help us connect to elastic search locally:

docker run -it \
  --rm \
  --name elasticsearch \
  -p 9200:9200 \
  -p 9300:9300 \
  -e "discovery.type=single-node" \
  -e "xpack.security.enabled=false" \
  docker.elastic.co/elasticsearch/elasticsearch:8.4.3

In [7]:
# Initalize the embedding model
model_name ='multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)

In [8]:
# Initalize the vector DB
es_client= Elasticsearch('http://localhost:9200')

In [9]:
# set the mapping
index_settings= {
    "settings":{
        "number_of_shards":1,
        "number_of_replicas":0
    },
    "mappings":{
        "properties":{
            "text":{"type":"text"},
            "section":{"type":"text"},
            "question":{"type":"text"},
            "course":{"type":"keyword"},
            "id":{"type":"keyword"},
            "question_text_vector":{"type":"dense_vector",
                          "dims":384,
                          "index":True,
                          "similarity":"cosine"},
        }
    }  
}

In [10]:
# create the index after we have created the mapping 
index_name = "course-questions"
es_client.indices.delete(index = index_name , ignore_unavailable=True) # delete index if it exists  
es_client.indices.create(index = index_name , body= index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [11]:
# create embeddings for the data & put it in the db
for doc in tqdm(documents):
    question_text= doc['question'] +' '+doc['text']
    doc['question_text_vector']= model.encode(question_text)
    es_client.index(index= index_name, document=doc)

100%|████████████████████████████████████████| 948/948 [01:16<00:00, 12.31it/s]


## Retrieval

In [12]:
def elastic_search_knn(field, vector, course):
    knn = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000,
        "filter": {
            "term": {
                "course": course
            }
        }
    }

    search_query = {
        "knn": knn,
        "_source": ["text", "section", "question", "course", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [13]:
def question_vector_knn(q,field='question_text_vector'):
    question = q['question']
    course= q['course']

    embeddings= model.encode(question)
    return elastic_search_knn(field=field, 
                   vector= embeddings,
                   course=course)

In [14]:
question_vector_knn(dict(
    question='Are sessions recorded if I miss one?',
    course='machine-learning-zoomcamp'
))

[{'question': 'What if I miss a session?',
  'course': 'machine-learning-zoomcamp',
  'section': 'General course-related questions',
  'text': 'Everything is recorded, so you won’t miss anything. You will be able to ask your questions for office hours in advance and we will cover them during the live stream. Also, you can always ask questions in Slack.',
  'id': '5170565b'},
 {'question': 'Is it going to be live? When?',
  'course': 'machine-learning-zoomcamp',
  'section': 'General course-related questions',
  'text': 'The course videos are pre-recorded, you can start watching the course right now.\nWe will also occasionally have office hours - live sessions where we will answer your questions. The office hours sessions are recorded too.\nYou can see the office hours as well as the pre-recorded course videos in the course playlist on YouTube.',
  'id': '39fda9f0'},
 {'question': 'The same accuracy on epochs',
  'course': 'machine-learning-zoomcamp',
  'section': '8. Neural Networks an

## RAG Flow {from module1}

In [15]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [16]:
def llm(prompt, model='gpt-4o'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [17]:
# previously: rag(query: str) -> str
def rag(query: dict, model='gpt-4o') -> str:
    # This gets the search results that will be a context after formualtion
    search_results = question_vector_knn(query)
    prompt = build_prompt(query['question'], search_results)
    answer = llm(prompt, model=model)
    return answer

In [18]:
OPENAI_API_KEY=""
client = OpenAI(api_key=OPENAI_API_KEY)

In [19]:
ground_truth[10]

{'question': 'Are sessions recorded if I miss one?',
 'course': 'machine-learning-zoomcamp',
 'document': '5170565b'}

In [20]:
rag_answ=rag(ground_truth[10])

In [21]:
rag_answ

'Yes, sessions are recorded if you miss one. You can catch up on the recorded sessions and ask questions in advance for office hours or in Slack.'

In [22]:
orginal_answ=doc_idx["5170565b"]["text"] # the correct answer

### Cosine Similiarity

In [23]:
rag_vector=model.encode(rag_answ)
original_vector=model.encode(orginal_answ)
rag_vector.dot(original_vector)

0.63458365

In [26]:
answers={}

In [None]:
# We will now do that on the entire dataset
# 1. we will go through the entire ground truth dataset questions 
# 2. generate answer using llm 
# 3. compute the cos sim between rag_answ & the documents that has the correct answer
# will not run this cell as not to pay for open ai & use the one generated from the course 
# He saved it in a file called results-gpt4o.csv that we will need to load it 
# if we want to use it 
# he also generated one wfor gpt-3.5
for i,record in enumerate(ground_truth):
    # inorder not to recalculate it if there is any error that occurred
    if i in answers:
        continue 
    # answer_llm = rag(record)
    # indexing the dictionary with the doc_id and getting the document text
    id=record['document']
    answer_original = doc_idx[id]['text']
    answers[i]={
        "answer_llm":answer_llm,
        "answer_original":answer_original,
        "document": id,
        'question': rec['question'],
        'course': rec['course'],
    }