In [25]:
from elasticsearch import Elasticsearch
import json
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer


In [31]:
es_client = Elasticsearch('https://cfe6-172-167-41-10.ngrok-free.app/') 


In [32]:
response = es_client.info()
print(response)

{'name': 'a32337f21ef1', 'cluster_name': 'docker-cluster', 'cluster_uuid': '74Hfpg0gQ_W91wHtnb4TwQ', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [16]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "question": {"type": "text"},
            "id": {"type": "keyword"},
            "qa_text_embeddings": {
                "type": "dense_vector",
                "dims": 768,
                "index": True,
                "similarity": "cosine"
            },
        }
    }
}

index_name = "diabetes-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'diabetes-questions'})

In [20]:
### Load Documents
with open('diabetes_data_with_vectors', 'r') as f_in:
    diabetes_data_with_vectors = json.load(f_in)

In [23]:
for doc in tqdm(diabetes_data_with_vectors):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/1071 [00:00<?, ?it/s]

In [28]:
def elastic_search_knn(field, vector):
    knn = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000,
    }

    search_query = {
        "knn": knn,
        "_source": ["answer", "question", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )

    result_docs = []

    for hit in es_results['hits']['hits']:
        result_docs.append((hit['_source']['id'],hit['_source']))

    return result_docs

In [29]:
embedding_model = SentenceTransformer('multi-qa-distilbert-cos-v1')

user_question = "What are the recommended food for a diabetes patients?"

user_question_embedding = embedding_model.encode(user_question)

In [30]:
elastic_search_knn('qa_text_embeddings',user_question_embedding)


[('b86f8575',
  {'question': 'What types of foods are emphasized in a healthy eating plan for diabetes?',
   'answer': 'A healthy eating plan for diabetes emphasizes a variety of foods including breads, cereals, rice, whole grains, fruits, vegetables, meat and meat substitutes, dairy products, and healthy fats.',
   'id': 'b86f8575'}),
 ('c070f69a',
  {'question': 'What are the key components of a healthy eating plan for diabetes?',
   'answer': 'A healthy eating plan for diabetes emphasizes breads, cereals, rice, whole grains, fruits, vegetables, meat and meat substitutes, dairy products, and healthy fats, while focusing on appropriate portion sizes.',
   'id': 'c070f69a'}),
 ('c659dae0',
  {'question': 'Question: Can you list some specific examples of foods rich in healthy fats that are suitable for individuals with diabetes?',
   'answer': 'Answer:  Yes, some great examples of healthy fat-rich foods suitable for individuals with diabetes include olive oil, avocados, almonds, walnuts