In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.environ.get("HF_TOKEN")

### 1. We can use HF API to get embedding

In [4]:
model_id = "sentence-transformers/all-MiniLM-L6-v2"
hf_token = HF_TOKEN


In [5]:
import requests

api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
headers = {"Authorization": f"Bearer {hf_token}"}


In [6]:
def query(texts):
    response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
    return response.json()

In [7]:
texts = ["How do I get a replacement Medicare card?",
        "What is the monthly premium for Medicare Part B?",
        "How do I terminate my Medicare Part B (medical insurance)?"]

output = query(texts)

### 2. We can convert this to DataFrame

In [13]:
import pandas as pd
embeddings = pd.DataFrame(output)


In [14]:
embeddings

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,374,375,376,377,378,379,380,381,382,383
0,-0.023889,0.055259,-0.011655,-0.033414,-0.012261,-0.024873,-0.012663,0.025346,0.018508,-0.083508,...,-0.161688,-0.046426,0.006004,0.005281,-0.003342,0.027754,0.020411,0.005778,0.034098,-0.006889
1,-0.012688,0.046874,-0.010502,-0.020384,-0.013361,0.042322,0.016628,-0.004099,-0.002607,-0.010188,...,-0.061594,-0.020717,-0.009082,-0.02926,-0.066253,0.065257,0.013229,-0.023103,-0.002785,0.010474
2,0.000494,0.119412,0.00523,-0.092734,0.007773,-0.005325,0.034506,-0.051981,-0.006265,-0.00611,...,-0.108326,-0.049646,-0.073399,-0.029898,-0.102734,0.062121,0.034606,0.016877,-0.023861,0.005264


In [22]:
dataset_embeddings = torch.from_numpy(embeddings.to_numpy()).to(torch.float)

In [23]:
dataset_embeddings

tensor([[-0.0239,  0.0553, -0.0117,  ...,  0.0058,  0.0341, -0.0069],
        [-0.0127,  0.0469, -0.0105,  ..., -0.0231, -0.0028,  0.0105],
        [ 0.0005,  0.1194,  0.0052,  ...,  0.0169, -0.0239,  0.0053]])

### 3. Is this result really the same as the result we get from local embeddings?

In [16]:
from sentence_transformers import SentenceTransformer 
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


In [17]:
local_out = model.encode(texts)
local_embeddings = pd.DataFrame(local_out)
local_embeddings

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,374,375,376,377,378,379,380,381,382,383
0,-0.023889,0.055259,-0.011655,-0.033414,-0.012261,-0.024873,-0.012663,0.025346,0.018508,-0.083508,...,-0.161688,-0.046426,0.006004,0.005281,-0.003342,0.027754,0.020411,0.005778,0.034098,-0.006889
1,-0.012688,0.046874,-0.010502,-0.020384,-0.013361,0.042322,0.016628,-0.004099,-0.002607,-0.010188,...,-0.061594,-0.020717,-0.009082,-0.02926,-0.066253,0.065257,0.013229,-0.023103,-0.002785,0.010474
2,0.000494,0.119412,0.00523,-0.092734,0.007773,-0.005325,0.034506,-0.051981,-0.006265,-0.00611,...,-0.108326,-0.049646,-0.073399,-0.029898,-0.102734,0.062121,0.034605,0.016877,-0.023861,0.005264


Yes. The result is the same.

### 4. Question embedding

In [19]:
import torch
question = ["How can Medicare help me?"]
output = query(question)

query_embeddings = torch.FloatTensor(output)

### 5. Semantic search

You can use the util.semantic_search function in the Sentence Transformers library to identify which of the FAQs are closest (most similar) to the user's query. This function uses cosine similarity as the default function to determine the proximity of the embeddings.

In [24]:
from sentence_transformers.util import semantic_search

hits = semantic_search(query_embeddings, dataset_embeddings, top_k=5)


In [25]:
hits

[[{'corpus_id': 0, 'score': 0.6081693172454834},
  {'corpus_id': 2, 'score': 0.5669911503791809},
  {'corpus_id': 1, 'score': 0.5590431094169617}]]

In [26]:
print([texts[hits[0][i]['corpus_id']] for i in range(len(hits[0]))])


['How do I get a replacement Medicare card?', 'How do I terminate my Medicare Part B (medical insurance)?', 'What is the monthly premium for Medicare Part B?']
