# Semantic Search with Sentence Transformers

Encode a collection of texts and retrieve the most semantically similar one to a given query.

In [7]:
from sentence_transformers import SentenceTransformer, util
import torch

In [2]:
# Load pretrained Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')
 
# Corpus of documents to search
corpus = [
    "The mitochondria is the powerhouse of the cell, generating ATP through cellular respiration.",
    "A binary search algorithm efficiently locates an item in a sorted array by repeatedly dividing the search interval in half.",
    "Inflation occurs when the general price level of goods and services rises, eroding purchasing power.",
    "Cats are wonderful companions.",
    "The French Revolution began in 1789 and led to profound social and political changes in France.",
    "The Eiffel Tower is located in Paris.",
    "Newton's first law states that an object at rest remains at rest unless acted upon by an external force.",
    "Pawan Kalyan, often called the 'Power Star' for his dynamic presence in Telugu cinema, has proven to be a political powerhouse as well."

]
 
# Encode the corpus
corpus_embeddings = model.encode(corpus, convert_to_tensor=True)
 


In [5]:
def search_doc(query):
    query_embedding = model.encode(query, convert_to_tensor=True)
 
    # Compute cosine similarity
    cos_scores = util.pytorch_cos_sim(query_embedding, corpus_embeddings)[0]
    top_result = torch.topk(cos_scores, k=1)
    
    # Display best match
    print("Query:", query)
    print("Most Relevant Document:", corpus[top_result.indices[0].item()])
    print("Score:", top_result.values[0].item())

In [None]:
# Query input 1
query = "Where is the Eiffel Tower?"
search_doc(query)

Query: Where is the Eiffel Tower?
Most Relevant Document: The Eiffel Tower is located in Paris.
Score: 0.863134503364563


In [None]:
# Query input 2 : Let's try to confuse the model
query = "Who is the power house?"
search_doc(query)
print("\n\n")

query = "Who is the powerhouse?"
search_doc(query)
print("\n\n")

query = "Diff between power house & powerhouse?"
search_doc(query)
print("\n\n")

Query: Who is the power house?
Most Relevant Document: Pawan Kalyan, often called the 'Power Star' for his dynamic presence in Telugu cinema, has proven to be a political powerhouse as well.
Score: 0.3438369929790497



Query: Who is the powerhouse?
Most Relevant Document: The mitochondria is the powerhouse of the cell, generating ATP through cellular respiration.
Score: 0.35128575563430786



Query: Diff between power house & powerhouse?
Most Relevant Document: Pawan Kalyan, often called the 'Power Star' for his dynamic presence in Telugu cinema, has proven to be a political powerhouse as well.
Score: 0.24223750829696655



