In [1]:
import warnings
warnings.filterwarnings("ignore")

#  Vector Stores and Embeddings

In [9]:
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

documents = ["Langchain is used for Building LLM Applications", "Hugging Face is for transformers and LLM ", "Agentic AI can be built using langchain and langgraph"]

embeddings = embedding_model.encode(documents)

embeddings[0].shape # Dimension of each sentence Embedding

(384,)

### Euclidean Distance

In [None]:
d = embeddings.shape[0]  
index = faiss.IndexFlatL2(d)
index.add(np.array(embeddings, dtype=np.float32))  

In [29]:
def search(query,top_k=2):
    query_embedding = embedding_model.encode([query]).astype(np.float32)
    distance , indices = index.search(query_embedding,top_k)
    print(distance , indices) 
    results = [(documents[i],distance[0][j]) for j ,i in enumerate(indices[0]) ]
    return results

query = "What is called Langchain"
results = search(query)

[[0.72871506 1.3303838 ]] [[0 2]]


In [None]:
# Euclidean : Lower Score - Higher Similarity , Higher Score - Lower Similarity
for result , distance in results:
    print(f"Document: {result} | Distance: {distance:.4f}")

Document: Langchain is used for Building LLM Applications | Distance: 0.7287
Document: Agentic AI can be built using langchain and langgraph | Distance: 1.3304


### Dot Product

In [None]:
index_ip = faiss.IndexFlatIP(d)
index_ip.add(embeddings)

def search_ip(query, top_k=2):
    query_embedding = embedding_model.encode([query]).astype(np.float32)
    distances, indices = index_ip.search(query_embedding, top_k)
    return [(documents[i], distances[0][j]) for j, i in enumerate(indices[0])]


query = "What is called Langchain"
results = search_ip(query)

# Dot Product : Higher Score - Higher Similarity , Lower Score - Lower Similarity
for doc, dist in results:
    print(f"Document: {doc} | Inner Product Similarity: {dist:.4f}")


Document: Langchain is used for Building LLM Applications | Inner Product Similarity: 0.6356
Document: Agentic AI can be built using langchain and langgraph | Inner Product Similarity: 0.3348


### Cosine Similarity

In [None]:
faiss.normalize_L2(embeddings)
index_cosine = faiss.IndexFlatIP(d)  
index_cosine.add(embeddings)

def search_cosine(query, top_k=2):
    query_embedding = embedding_model.encode([query]).astype(np.float32)
    faiss.normalize_L2(query_embedding)  
    distances, indices = index_cosine.search(query_embedding, top_k)
    return [(documents[i], distances[0][j]) for j, i in enumerate(indices[0])]


query = "What is called Langchain"
results = search_cosine(query)
# Cosine Similarity : Higher Score - Higher Similarity , Lower Score - Lower Similarity

for doc, dist in results:
    print(f"Document: {doc} | Cosine Similarity: {dist:.4f}")

Document: Langchain is used for Building LLM Applications | Cosine Similarity: 0.6356
Document: Agentic AI can be built using langchain and langgraph | Cosine Similarity: 0.3348
