# Embedding And Storage:

## Install Qdrant and Required Libraries:

In [None]:
#!pip install protobuf==5.26.1
!pip install qdrant-client
!pip install sentence-transformers

Collecting qdrant-client
  Downloading qdrant_client-1.11.0-py3-none-any.whl.metadata (10 kB)
Collecting grpcio-tools>=1.41.0 (from qdrant-client)
  Downloading grpcio_tools-1.66.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.3 kB)
Collecting httpx>=0.20.0 (from httpx[http2]>=0.20.0->qdrant-client)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant-client)
  Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Collecting protobuf<6.0dev,>=5.26.1 (from grpcio-tools>=1.41.0->qdrant-client)
  Downloading protobuf-5.27.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Collecting grpcio>=1.41.0 (from qdrant-client)
  Downloading grpcio-1.66.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting httpcore==1.* (from httpx>=0.20.0->httpx[http2]>=0.20.0->qdrant-client)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0

## Set Up and Configure Qdrant:



### Initialize the Qdrant Client:

In [None]:
from qdrant_client import QdrantClient, models
from qdrant_client.models import PointStruct

client = QdrantClient(":memory:")

### Create a Collection in Qdrant:

In [None]:
if client.collection_exists("chatbot_embeddings_test"):
    # drop collection if already exist
    client.delete_collection("chatbot_embeddings_test")

In [None]:
client.recreate_collection(
    collection_name="chatbot_embeddings_test",
    vectors_config=models.VectorParams(size=384,
                                       distance=models.Distance.DOT),
)

  client.recreate_collection(


True

### Generate the Embeddings:

In [None]:
from sentence_transformers import SentenceTransformer

# load a model that produces 384-dimensional embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# examples of text chunks that talk about illnesses
text_chunks = ["""Hypertension is a condition in which the force of
               the blood against the artery walls is too high. It
               is also known as high blood pressure and can lead to
                serious healthcomplications if left untreated.""",
               """Diabetes is a chronic condition characterized by
               high levels of sugar in the blood. Common symptoms
               include excessive thirst, frequent urination, extreme
               fatigue, and blurred vision.""",
               """Heart disease refers to a range of conditions that
               affect the heart, including coronary artery disease,
               heart rhythm problems, and heart defects. Major causes
               include high cholesterol, high blood pressure,
               smoking, and obesity.""",
               """Asthma is a respiratory condition that causes the
               airways to become inflamed and narrowed. Treatment
               typically includes the use of inhalers that contain
               bronchodilators and corticosteroidsto reduce
               inflammation and open the airways."""]

# generate embeddings
embeddings = model.encode(text_chunks)
print(embeddings.shape)  # ensuring that number of chunks is 384


(4, 384)


### Load the Embeddings into Qdrant:

In [None]:
for i, embedding in enumerate(embeddings):
    client.upsert(
        collection_name="chatbot_embeddings_test",
        points=[
            PointStruct(
                id= i+1,
                vector= embedding.tolist(),
                payload= {"text": text_chunks[i]}
            )
        ]
    )

### Test the Retrieval Process:

In [None]:
import time

query = "What are the common symptoms associated with diabetes?"

query_embedding = model.encode(query).tolist()

start_time = time.time()

search_results = client.search(
    collection_name="chatbot_embeddings_test",
    query_vector=query_embedding,
    limit=4  # Number of top results to return
)

end_time = time.time()
print(f"Query took {end_time - start_time} seconds")

for result in search_results:
    print(f"Score: {result.score}, Text: {result.payload['text']}")

Query took 0.0074880123138427734 seconds
Score: 0.7462586693829891, Text: Diabetes is a chronic condition characterized by high levels of sugar in the blood. Common symptoms include excessive thirst, frequent urination, extreme fatigue, and blurred vision.
Score: 0.20300906244734218, Text: Heart disease refers to a range of conditions that affect the heart, including coronary artery disease, heart rhythm problems, and heart defects. Major causes include high cholesterol,
               high blood pressure, smoking, and obesity.
Score: 0.11358719147429977, Text: Hypertension is a condition in which the force of the blood against the artery walls is too high. It is also known as high blood pressure and can lead to serious health
                    complications if left untreated.
Score: -0.050635168511529896, Text: Asthma is a respiratory condition that causes the airways to become inflamed and narrowed. Treatment typically includes the use of inhalers that contain bronchodilators and c

In [None]:
query = "What are the possible treatment for asthma?"

query_embedding = model.encode(query).tolist()

start_time = time.time()

search_results = client.search(
    collection_name="chatbot_embeddings_test",
    query_vector=query_embedding,
    limit=4  # Number of top results to return
)

end_time = time.time()
print(f"Query took {end_time - start_time} seconds")

for result in search_results:
    print(f"Score: {result.score}, Text: {result.payload['text']}")

Query took 0.0004870891571044922 seconds
Score: 0.6926184422358225, Text: Asthma is a respiratory condition that causes the airways to become inflamed and narrowed. Treatment typically includes the use of inhalers that contain bronchodilators and corticosteroids
               to reduce inflammation and open the airways.
Score: 0.13331556723786356, Text: Heart disease refers to a range of conditions that affect the heart, including coronary artery disease, heart rhythm problems, and heart defects. Major causes include high cholesterol,
               high blood pressure, smoking, and obesity.
Score: 0.0005567263951216025, Text: Hypertension is a condition in which the force of the blood against the artery walls is too high. It is also known as high blood pressure and can lead to serious health
                    complications if left untreated.
Score: -0.043613061214324525, Text: Diabetes is a chronic condition characterized by high levels of sugar in the blood. Common symptoms includ