In [4]:
!pip install elasticsearch sentence_transformers tqdm 

Collecting elasticsearch
  Using cached elasticsearch-8.14.0-py3-none-any.whl.metadata (7.2 kB)
Collecting sentence_transformers
  Using cached sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting elastic-transport<9,>=8.13 (from elasticsearch)
  Using cached elastic_transport-8.13.1-py3-none-any.whl.metadata (3.7 kB)
Using cached elasticsearch-8.14.0-py3-none-any.whl (480 kB)
Using cached sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
Using cached elastic_transport-8.13.1-py3-none-any.whl (64 kB)
Installing collected packages: elastic-transport, elasticsearch, sentence_transformers
Successfully installed elastic-transport-8.13.1 elasticsearch-8.14.0 sentence_transformers-3.0.1


In [5]:
from elasticsearch import Elasticsearch
import json
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer


In [10]:
es_client = Elasticsearch('http://localhost:9200') 


In [11]:
response = es_client.info()
print(response)

{'name': 'w-along-llmproject-7526af7482814462acb59e22664fd36f-6d87544mqsm', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'eQy_qs9PQqyPBMIErfu6gQ', 'version': {'number': '8.9.1', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'a813d015ef1826148d9d389bd1c0d781c6e349f0', 'build_date': '2023-08-10T05:02:32.517455352Z', 'build_snapshot': False, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [65]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "answer": {"type": "text"},
            "question": {"type": "text"},
            "id": {"type": "keyword"},
            "qa_text_embeddings": {
                "type": "dense_vector",
                "dims": 768,
                "index": True,
                "similarity": "cosine"
            },
        }
    }
}

index_name = "diabetes-questions"
try:
    es_client.indices.create(index=index_name, body=index_settings, ignore=400)
    print(f"Index '{index_name}' created or already exists.")
except Exception as e:
    print(f"An error occurred: {e}")

Index 'diabetes-questions' created or already exists.


  es_client.indices.create(index=index_name, body=index_settings, ignore=400)


In [66]:
### Load Documents
with open('diabetes_data_with_vectors', 'r') as f_in:
    diabetes_data_with_vectors = json.load(f_in)

In [67]:
for doc in tqdm(diabetes_data_with_vectors):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/1071 [00:00<?, ?it/s]

In [68]:
def elastic_search_knn(field, vector):
    knn = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000,
    }

    search_query = {
        "knn": knn,
        "_source": ["answer", "question", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )

    result_docs = []

    for hit in es_results['hits']['hits']:
 
        result_docs.append(f"Question: {hit['_source']['question']} /n Answer: {hit['_source']['answer']}")

    return result_docs

In [71]:
embedding_model = SentenceTransformer('multi-qa-distilbert-cos-v1')

user_question = "What are the recommended food for a diabetes patients?"

user_question_embedding = embedding_model.encode(user_question)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/9.52k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/523 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [72]:
elastic_search_knn('qa_text_embeddings',user_question_embedding)


['Question: What types of foods are emphasized in a healthy eating plan for diabetes? /n Answer: A healthy eating plan for diabetes emphasizes a variety of foods including breads, cereals, rice, whole grains, fruits, vegetables, meat and meat substitutes, dairy products, and healthy fats.',
 'Question: What are the key components of a healthy eating plan for diabetes? /n Answer: A healthy eating plan for diabetes emphasizes breads, cereals, rice, whole grains, fruits, vegetables, meat and meat substitutes, dairy products, and healthy fats, while focusing on appropriate portion sizes.',
 'Question: Question: Can you list some specific examples of foods rich in healthy fats that are suitable for individuals with diabetes? /n Answer: Answer:  Yes, some great examples of healthy fat-rich foods suitable for individuals with diabetes include olive oil, avocados, almonds, walnuts, salmon, tuna, and flaxseeds. These foods provide essential nutrients and support overall well-being.',
 'Question

In [61]:
es_client.indices.delete(index=index_name, body=index_settings, ignore=400)

  es_client.indices.delete(index=index_name, body=index_settings, ignore=400)


ObjectApiResponse({'acknowledged': True})

In [None]:
with open('es_client.pkl', 'wb') as file:
    pickle.dump(es_client, file)