In [1]:
!pip install pandas sentence-transformers faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1


## **EXTRACT DATA**

In [2]:
import pandas as pd

# Sample health questions and answers
data = {
    "Question": [
        "How to check blood pressure?",
        "How much water should I drink daily?",
        "How often should I exercise?"
    ],
    "Answer": [
        "Use a digital blood pressure monitor every morning.",
        "Drink at least 8 glasses of water every day.",
        "Exercise for 30 minutes, 5 days a week."
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)
df


Unnamed: 0,Question,Answer
0,How to check blood pressure?,Use a digital blood pressure monitor every mor...
1,How much water should I drink daily?,Drink at least 8 glasses of water every day.
2,How often should I exercise?,"Exercise for 30 minutes, 5 days a week."


### **TRANSFORM**

In [3]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Combine Question + Answer into one text for embedding
documents = df['Question'] + " " + df['Answer']

# Convert text to embeddings
embeddings = model.encode(documents.tolist())
embeddings


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

array([[-0.01837131,  0.0524233 , -0.04136338, ...,  0.02115469,
        -0.05518245, -0.0360369 ],
       [ 0.01369346,  0.06844495,  0.0176368 , ...,  0.0680446 ,
        -0.00913566, -0.05965208],
       [ 0.04329093, -0.03637487,  0.03056625, ..., -0.028109  ,
        -0.03693292, -0.02378955]], dtype=float32)

**Load (Store in Vector Database)**

In [4]:
import faiss
import numpy as np

# Dimension of embeddings
dimension = embeddings.shape[1]

# Create FAISS index
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# Now FAISS stores your data for search


In [5]:
# Example user query
query = "How can I monitor my blood pressure?"

# Convert query to embedding
query_vec = model.encode([query])

# Search in FAISS index (k=1 returns best match)
D, I = index.search(np.array(query_vec), k=1)

# Show the best answer
print("Question:", df['Question'][I[0][0]])
print("Answer:", df['Answer'][I[0][0]])


Question: How to check blood pressure?
Answer: Use a digital blood pressure monitor every morning.
