In [None]:
# Import libraries

import pandas as pd
import json
from sentence_transformers import SentenceTransformer
from elasticsearch import Elasticsearch
from openai import OpenAI
from tqdm.autonotebook import tqdm
import random

import warnings
warnings.filterwarnings("ignore")

In [3]:
# Read JSON document with generated ids
with open('..\data\data-with-ids.json', 'rt') as f_in:
    documents = json.load(f_in)

In [4]:
doc_idx = {d['id']: d for d in documents}

In [5]:
# Ground truth data

df_questions = pd.read_csv('..\data\ground-truth-retrieval.csv')
ground_truth = df_questions.to_dict(orient='records')

In [6]:
model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)

In [7]:
v = model.encode('symptoms of autism')
len(v)

384

In [8]:
es_client = Elasticsearch('http://localhost:9200') 

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "question": {"type": "text"},
            "answer": {"type": "text"},
            "source": {"type": "text"},
            "focus_area": {"type": "text"},
            "id": {"type": "keyword"},
            "question_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "answer_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "question_answer_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "ques_focusarea_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "ques_answer_focus_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
            "answer_focusarea_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
        }
    }
}

index_name = "health-questions"

es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'health-questions'})

In [9]:
for doc in tqdm(documents):
    question = doc['question']
    answer = doc['answer']
    focus = doc['focus_area']
    
    qa = question + ' ' + answer    
    qf = question + ' ' + focus
    af = answer + ' ' + focus
    qaf = answer + ' ' + answer + ' ' + focus

    doc['question_vector'] = model.encode(question)
    doc['answer_vector'] = model.encode(answer)
    doc['question_answer_vector'] = model.encode(qa)
    doc['ques_focusarea_vector'] = model.encode(qf)
    doc['ques_answer_focus_vector'] = model.encode(qaf)
    doc['answer_focusarea_vector'] = model.encode(af)

  0%|          | 0/292 [00:00<?, ?it/s]

In [10]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/292 [00:00<?, ?it/s]

In [108]:
def elastic_search_knn(field, vector):
    knn = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000}

    search_query = {
        "knn": knn,
        "_source": ["question", "answer", "source", "focus_area", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [12]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

In [13]:
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [14]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [15]:
def question_vector_knn(q):
    question = q['question']

    v_q = model.encode(question)

    return elastic_search_knn('question_vector', v_q)

evaluate(ground_truth, question_vector_knn)

  0%|          | 0/1460 [00:00<?, ?it/s]

{'hit_rate': 0.963013698630137, 'mrr': 0.9348401826484016}

In [16]:
def answer_vector_knn(q):
    question = q['question']
    v_q = model.encode(question)
    return elastic_search_knn('answer_vector', v_q)

evaluate(ground_truth, answer_vector_knn)

  0%|          | 0/1460 [00:00<?, ?it/s]

{'hit_rate': 0.9575342465753425, 'mrr': 0.9026369863013699}

In [111]:
def qa_vector_knn(q):
    question = q['question']
    v_q = model.encode(question)
    return elastic_search_knn('question_answer_vector', v_q)

evaluate(ground_truth, qa_vector_knn)

  0%|          | 0/1460 [00:00<?, ?it/s]

{'hit_rate': 0.9849315068493151, 'mrr': 0.9551027397260271}

In [22]:
def qf_vector_knn(q):
    question = q['question']
    v_q = model.encode(question)
    return elastic_search_knn('ques_focusarea_vector', v_q)

evaluate(ground_truth, qf_vector_knn)

  0%|          | 0/1460 [00:00<?, ?it/s]

{'hit_rate': 0.9616438356164384, 'mrr': 0.9249885844748859}

In [23]:
def qaf_vector_knn(q):
    question = q['question']
    v_q = model.encode(question)
    return elastic_search_knn('ques_answer_focus_vector', v_q)

evaluate(ground_truth, qaf_vector_knn)

  0%|          | 0/1460 [00:00<?, ?it/s]

{'hit_rate': 0.9705479452054795, 'mrr': 0.9136073059360729}

In [24]:
def af_vector_knn(q):
    question = q['question']
    v_q = model.encode(question)
    return elastic_search_knn('answer_focusarea_vector', v_q)

evaluate(ground_truth, af_vector_knn)

  0%|          | 0/1460 [00:00<?, ?it/s]

{'hit_rate': 0.976027397260274, 'mrr': 0.9332648401826482}

The combination of question and answer vectors has the highest hit rate and mrr with ; 

'hit_rate': 0.9849315068493151, 'mrr': 0.9551027397260271

### **RAG Flow**

In [28]:
# Define a function that creates a prompt for an LLM to answer health-related questions based on the given data

def build_prompt(query, search_results):
    prompt_template = """
You're a healthcare assistant AI. Answer the QUESTION based on the CONTEXT provided from a health FAQ database.
Use only the facts from the CONTEXT to provide an accurate, clear, and concise answer.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context += f"Question: {doc['question']}\nAnswer: {doc['answer']}\nSource: {doc['source']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [29]:
client = OpenAI()

def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [127]:
def rag(query_dict, model='gpt-4o-mini'):
    # Extract the question from the input dictionary
    question = query_dict['question']  
    search_results = qa_vector_knn(query_dict) 
    prompt = build_prompt(question, search_results)
    answer = llm(prompt, model=model) 
    return answer

In [129]:
question_dict = {'question': 'how do i know if i have an autoimmune disease?'}

answer = rag(question_dict)
print(answer) 

To determine if you have an autoimmune disease, consult your doctor for a proper evaluation. They will typically conduct a physical exam, take a detailed history of your health, and may order several tests, including blood tests to check for specific antibodies or markers associated with autoimmune diseases. A thorough assessment of any symptoms you are experiencing, like fatigue, joint pain, or skin conditions, will also help guide diagnosis. Always seek medical advice if you suspect you have an autoimmune condition.
