In [1]:
import requests
import pandas as pd

In [5]:
df = pd.read_csv('notebooks/claims.csv')

In [6]:
documents = df.to_dict(orient='records')

In [7]:
documents[0]

{'Category': 'Manage existing benefit',
 'Question': 'How do I update my benefit information?',
 'Answer': 'You can update your benefit information online through your account.',
 'Section': 'general claim benefits'}

In [None]:
from sentence_transformers import SentenceTransformer

model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)

In [None]:
len(model.encode("Getting size of model dim"))

In [None]:
sections = []
for doc in documents:
    # Transforming the title into an embedding using the model
    doc["text_vector"] = model.encode(doc["category"]).tolist()
    sections.append(doc)

In [None]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200') 

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "question": {"type": "text"},
            "category": {"type": "keyword"},
            "text_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
        }
    }
}
index_name = "uk-benefit-claims"

es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)

In [None]:
for doc in sections:
    try:
        es_client.index(index=index_name, document=doc)
    except Exception as e:
        print(e)

In [None]:
search_term = "How do I stop claiming benefits?"
vector_search_term = model.encode(search_term)

In [None]:
query = {
    "field": "text_vector",
    "query_vector": vector_search_term,
    "k": 5,
    "num_candidates": 10000, 
}

In [None]:
res = es_client.search(index=index_name, knn=query, source=["text", "section", "question", "course"])
res["hits"]["hits"]

In [None]:
knn_query = {
    "field": "text_vector",
    "query_vector": vector_search_term,
    "k": 5,
    "num_candidates": 10000
}
response = es_client.search(
    index=index_name,
    query={
        "match": {"category": "Manage existing benefit"},
    },
    knn=knn_query,
    size=5
)
response["hits"]["hits"]