<h1>Hugging Face Embeddings and Semantic Search</h1>

<h3>1. Using Embeddings in Hugging Face</h3>

In [5]:
from sentence_transformers import SentenceTransformer

model_name = "all-MiniLM-L6-v2"

# Create the embedding model
embedder = SentenceTransformer(model_name)

In [6]:
sentence = "What are embeddings?"

# Embed the sentence
embedding = embedder.encode([sentence])

# View the embedding vector
print(embedding)

[[-1.73370400e-03 -8.55376422e-02 -1.51369553e-02 -1.05205402e-02
   2.52214149e-02  7.31167421e-02 -2.82050893e-02  4.29558131e-04
   7.04325736e-02 -2.81848982e-02  2.75691487e-02  3.37372273e-02
   4.14264686e-02  7.12580280e-03 -6.59378916e-02  2.29259431e-02
   5.05346991e-02  6.68734312e-02 -8.28059763e-02  2.64009275e-02
  -1.94520913e-02 -1.40376184e-02 -6.77149836e-03 -7.95342475e-02
   6.00138865e-02 -2.32632868e-02 -5.62363155e-02  4.81321923e-02
   7.38315359e-02 -3.44408825e-02  3.29989158e-02 -3.28794606e-02
  -1.85335781e-02  5.60518429e-02 -5.12580164e-02  1.09540753e-01
   1.84110533e-02  4.26630862e-03 -7.90110454e-02  4.40341607e-03
   1.67858116e-02  1.99063551e-02 -3.60956825e-02  5.55006526e-02
   6.40486628e-02 -3.67567539e-02 -2.81934235e-02 -4.05343585e-02
  -1.05584569e-01  2.39179395e-02 -3.28660496e-02 -2.92969737e-02
  -6.91607669e-02  4.60894555e-02 -2.93364767e-02 -6.27264678e-02
  -1.43889971e-02 -1.65374726e-02  1.97086688e-02 -1.54229654e-02
   6.72278

In [7]:
# View the shape of embedding
print(embedding.shape)

(1, 384)


<h3>2. Semantic Search</h3>

In [8]:
from sentence_transformers import SentenceTransformer

model_name = "all-MiniLM-L6-v2"

# Create the embedding model
encoder = SentenceTransformer(model_name)

# Define the documents
documents = [
    "The cat sits on the mat.",
    "Dogs are great companions.",
    "I enjoy hiking in the mountains.",
    "The sun is shining brightly today.",
    "Cats and dogs are popular pets."
]

document_embeddings = encoder.encode(documents)

query = "What are the most recent wildlife articles="

query_embedding = encoder.encode([query])

In [9]:
from sentence_transformers import util

hits = util.semantic_search(query_embedding, document_embeddings, top_k=3)

for hit in hits[0]:
    print(documents[hit['corpus_id']], "(Score: {:.4f})".format(hit['score']))

Cats and dogs are popular pets. (Score: 0.3827)
Dogs are great companions. (Score: 0.2855)
I enjoy hiking in the mountains. (Score: 0.2427)
