In [1]:
import pandas as pd
import minsearch
import json

from tqdm.auto import tqdm
from openai import OpenAI
from dotenv import load_dotenv
from elasticsearch import Elasticsearch

## Load Document

In [2]:
load_dotenv()

True

In [3]:
df = pd.read_csv('notebooks/claims.csv')
df.columns = df.columns.str.lower()

In [4]:
documents = df.to_dict(orient='records')

In [5]:
documents[10]

{'category': 'Temporarily unable to work',
 'question': "Can I get sick pay if I'm self-isolating?",
 'answer': "Yes Statutory Sick Pay is available if you're self-isolating.",
 'section': 'general claim benefits'}

## Minsearch

In [6]:
index = minsearch.Index(
    text_fields=["question", "answer", "section"],
    keyword_fields=["category"]
)
index.fit(documents)

<minsearch.Index at 0x7e947a0d99a0>

In [7]:
q = "Can I get sick pay if I'm self-isolating?"

In [8]:
client = OpenAI()
response = client.chat.completions.create(
    model='gpt-4o',
    messages=[{"role": "user", "content": q}]
)

response.choices[0].message.content

"Whether you can get sick pay while self-isolating depends on several factors, including your country of residence and your employment status.\n\n### United Kingdom\n\nIn the UK, statutory sick pay (SSP) can be claimed by employees if they’re self-isolating in accordance with government and medical guidelines. However, SSP typically does not cover self-employed individuals.\n\n- **Employees**: Eligible if they earn at least £120 a week and have been self-isolating for more than 4 consecutive days.\n- **Self-employed**: Generally, self-employed individuals are not entitled to SSP, but they may be eligible for other forms of support like the Self-Employment Income Support Scheme (SEISS).\n\n### United States\n\n- **Employees**: Under the Families First Coronavirus Response Act (FFCRA) which expired in December 2020, employees of certain public employers or private employers with fewer than 500 employees could get paid sick leave. Some states have their own laws providing paid sick leave.

In [1]:
def search(query):
    boost = {'question': 3.0, 'category': 0.5}

    results = index.search(
        query=query,
        filter_dict={'section': 'general claim benefits'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [10]:

def build_prompt(query, search_results):
    prompt_template = """
You are an expert in United Kingdom Benefit Claims and Medical Negligence Claims. Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['answer']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [11]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [16]:
query = "Can I get sick pay if I'm self-isolating?"

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [17]:
rag(query)

"If you're self-isolating, you may be eligible for Statutory Sick Pay (SSP) provided you meet the qualifying criteria. While typically, SSP is available to employees, self-employed individuals might not directly qualify for SSP but might be eligible for other support like Universal Credit or Employment and Support Allowance (ESA) if they cannot work. It's always best to check the latest government guidance or consult with an advisor for the most accurate and tailored advice based on your specific situation."

## Elasticsearch

In [14]:
es_client = Elasticsearch('http://localhost:9200') 

In [18]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "category": {"type": "keyword"} 
        }
    }
}

index_name = "claim-assistant"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'claim-assistant'})

In [19]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/425 [00:00<?, ?it/s]

In [20]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "answer", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "category": "general claim benefits"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [21]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [22]:
query = "Can I get sick pay if I'm self-isolating?"
rag(query)

"If you cannot work because you're self-isolating, you may be eligible for statutory sick pay (SSP) if you meet the relevant criteria, even if you are not personally sick but are required to self-isolate by NHS guidelines. However, the specific eligibility criteria need to be checked to confirm your entitlement."

## Vector Search

In [23]:
from sentence_transformers import SentenceTransformer

model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)



In [24]:
len(model.encode("Getting size of model dim"))

384

In [34]:
questions = []
for doc in documents:
    doc["text_vector"] = model.encode(doc["question"]).tolist()
    questions.append(doc)

In [35]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200') 

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "section": {"type": "text"},
            "category": {"type": "keyword"},
            "text_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
        }
    }
}
index_name = "uk-benefit-claims"

es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'uk-benefit-claims'})

In [36]:
for doc in sections:
    try:
        es_client.index(index=index_name, document=doc)
    except Exception as e:
        print(e)

In [37]:
search_term = "Can I get sick pay if I'm self-isolating?"
vector_search_term = model.encode(search_term)

In [38]:
query = {
    "field": "text_vector",
    "query_vector": vector_search_term,
    "k": 5,
    "num_candidates": 10000, 
}

In [39]:
res = es_client.search(index=index_name, knn=query, source=["text", "section", "question", "course"])
res["hits"]["hits"]

[{'_index': 'uk-benefit-claims',
  '_id': 'OioGvZEBrjjSDBTcBurJ',
  '_score': 1.0,
  '_source': {'question': "Can I get sick pay if I'm self-isolating?",
   'section': 'general claim benefits'}},
 {'_index': 'uk-benefit-claims',
  '_id': 'PCoGvZEBrjjSDBTcBur3',
  '_score': 0.79467446,
  '_source': {'question': 'How do I apply for sick pay?',
   'section': 'general claim benefits'}},
 {'_index': 'uk-benefit-claims',
  '_id': 'PSoGvZEBrjjSDBTcB-oP',
  '_score': 0.7573199,
  '_source': {'question': 'What is statutory sick pay?',
   'section': 'general claim benefits'}},
 {'_index': 'uk-benefit-claims',
  '_id': 'ZyoGvZEBrjjSDBTcJOsI',
  '_score': 0.73088014,
  '_source': {'question': 'Can compensation cover the cost of private medical treatment?',
   'section': 'nhs claim benefits'}},
 {'_index': 'uk-benefit-claims',
  '_id': 'sioGvZEBrjjSDBTcLOsj',
  '_score': 0.711921,
  '_source': {'question': 'Is a miscommunication between healthcare providers a breach of duty?',
   'section': 'nhs cl