In [1]:
from dotenv import load_dotenv
import os, getpass
load_dotenv()  # Loads environment variables from .env file
# ignore all warnings in this notebook
import warnings
warnings.filterwarnings("ignore")

In [2]:
google_api_key = os.getenv("GOOGLE_API_KEY")
if google_api_key:
    os.environ["GOOGLE_API_KEY"] = google_api_key
    print("GOOGLE_API_KEY found in .env file.")
else:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter GEMINI API key: ")

In [3]:
openai_api_key = os.getenv("OPENAI_API_KEY")
os.environ["SSL_NO_VERIFY"] = "1"
os.environ["OPENAI_API_BASE"] = "https://api.openai.com/v1"
openai_api_key = os.getenv("OPENAI_API_KEY")
if openai_api_key:
    os.environ["OPENAI_API_KEY"] = openai_api_key
else:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [4]:
from dotenv import load_dotenv
import os

# Loads environment variables from .env file
load_dotenv()  

# extracting HUGGINGFACEHUB_API_TOKEN
huggingface_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if huggingface_api_key:
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_api_key
    print("HUGGINGFACEHUB_API_TOKEN found in .env file.")
else:
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass.getpass("Enter HUGGINGFACEHUB API key: ")

## Gemini

In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")


E0000 00:00:1762507420.813180   22064 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [6]:
vector = embeddings.embed_query("Machine learning is fascinating")

In [7]:
len(vector)

3072

In [9]:
vector[:5]

[-0.0113553237169981,
 0.013383835554122925,
 0.004231748636811972,
 -0.09220291674137115,
 -0.00380918406881392]

In [19]:
texts = [
    "Machine learning is fascinating",
    "AI and ML are interesting topics",
    "I love eating pizza"
]

print(f"\nEmbedding {len(texts)} sentences...")
vectors = [embeddings.embed_query(text) for text in texts]


Embedding 3 sentences...


In [20]:
len(vectors)

3

In [23]:
len(vectors[2])

3072

In [24]:
import numpy as np
from numpy.linalg import norm
def cosine_similarity(v1, v2):
    """Measures the angle between vectors (0 to 1, higher = more similar)"""
    return np.dot(v1, v2) / (norm(v1) * norm(v2))
def euclidean_distance(v1, v2):
    """Measures straight-line distance (lower = more similar)"""
    return norm(np.array(v1) - np.array(v2))
def dot_product(v1, v2):
    """Measures projection (higher = more similar)"""
    return np.dot(v1, v2)

print("\nEuclidean Distances:")
print(f"Sentence 1 vs 2 (both about ML): {euclidean_distance(vectors[0], vectors[1]):.4f}")
print(f"Sentence 1 vs 3 (ML vs pizza): {euclidean_distance(vectors[0], vectors[2]):.4f}")
print("\nDot product:")
print(f"Sentence 1 vs 2 (both about ML): {dot_product(vectors[0], vectors[1]):.4f}")
print(f"Sentence 1 vs 3 (ML vs pizza): {dot_product(vectors[0], vectors[2]):.4f}")
print("\nCosine Similarities:")
print(f"Sentence 1 vs 2 (both about ML): {cosine_similarity(vectors[0], vectors[1]):.4f}")
print(f"Sentence 1 vs 3 (ML vs pizza): {cosine_similarity(vectors[0], vectors[2]):.4f}")
print(f"Sentence 2 vs 3 (ML vs pizza): {cosine_similarity(vectors[1], vectors[2]):.4f}")



Euclidean Distances:
Sentence 1 vs 2 (both about ML): 0.3483
Sentence 1 vs 3 (ML vs pizza): 0.6394

Dot product:
Sentence 1 vs 2 (both about ML): 0.9393
Sentence 1 vs 3 (ML vs pizza): 0.7956

Cosine Similarities:
Sentence 1 vs 2 (both about ML): 0.9393
Sentence 1 vs 3 (ML vs pizza): 0.7956
Sentence 2 vs 3 (ML vs pizza): 0.7799


In [25]:

# User's search query
query = "How do neural networks learn?"

# Your documents
documents = [
    "Neural networks use backpropagation to adjust weights",
    "Best pizza recipes in New York",
    "Deep learning models train on large datasets",
    "The weather forecast for tomorrow"
]

# Embed everything
query_vector = embeddings.embed_query(query)
doc_vectors = [embeddings.embed_query(doc) for doc in documents]

# Find most similar documents
similarities = [
    (doc, cosine_similarity(query_vector, doc_vec))
    for doc, doc_vec in zip(documents, doc_vectors)
]

# Sort by similarity
similarities.sort(key=lambda x: x[1], reverse=True)

print("\nSearch results for:", query)
for doc, score in similarities:
    print(f"  Score: {score:.4f} - {doc}")


Search results for: How do neural networks learn?
  Score: 0.9283 - Neural networks use backpropagation to adjust weights
  Score: 0.8813 - Deep learning models train on large datasets
  Score: 0.7709 - The weather forecast for tomorrow
  Score: 0.7513 - Best pizza recipes in New York


In [None]:
len(doc_vectors[1])

In [26]:
print("\n--- embed_documents (batch processing) ---")
documents = [
    "Machine learning is a subset of AI",
    "Neural networks are inspired by the brain",
    "Python is a popular programming language"
]

doc_embeddings = embeddings.embed_documents(documents)
print(f"Embedded {len(doc_embeddings)} documents")
print(f"Each embedding has {len(doc_embeddings[0])} dimensions")


--- embed_documents (batch processing) ---
Embedded 3 documents
Each embedding has 3072 dimensions


In [27]:
# Cached Embeddings

import time
from langchain_classic.embeddings import CacheBackedEmbeddings  
from langchain_classic.storage import LocalFileStore 
from langchain_core.vectorstores import InMemoryVectorStore

In [31]:
store = LocalFileStore("./cache_/") 

cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    store,
    namespace=embeddings.model,
    batch_size = 100
)

In [32]:
documents = [
    "First document about machine learning",
    "Second document about neural networks",
    "Third document about data science",
] * 10  # 30 documents total


In [33]:
print(f"\nTesting with {len(documents)} documents...")

# First run - no cache
print("\n--- First Run (Cold Cache) ---")
start = time.time()
embeddings_1 = cached_embedder.embed_documents(documents)
time_1 = time.time() - start
print(f"Time: {time_1:.3f} seconds")
print(f"Cache items: {len(list(store.yield_keys()))}")


Testing with 30 documents...

--- First Run (Cold Cache) ---
Time: 1.515 seconds
Cache items: 3


In [34]:
# Second run - with cache
print("\n--- Second Run (Warm Cache) ---")
start = time.time()
embeddings_2 = cached_embedder.embed_documents(documents)
time_2 = time.time() - start
print(f"Time: {time_2:.3f} seconds")
print(f"Speedup: {time_1/time_2:.1f}x faster!")

# Verify they're identical
print(f"\nEmbeddings match: {np.allclose(embeddings_1[0], embeddings_2[0])}")


--- Second Run (Warm Cache) ---
Time: 0.035 seconds
Speedup: 43.4x faster!

Embeddings match: True


In [35]:
import hashlib
text = "Machine learning is fascinating"
cache_key = hashlib.sha256(text.encode()).hexdigest()

print(f"\nOriginal text: '{text}'")
print(f"Cache key (SHA-256 hash): {cache_key}")
print(f"\nKey insight: Same text → Same hash → Cache hit")

# Show cached keys
keys = list(store.yield_keys())
print(f"\nTotal cached keys: {len(keys)}")
if keys:
    print(f"Example cache keys:")
    for key in list(keys)[:3]:
        print(f"  - {key}")


Original text: 'Machine learning is fascinating'
Cache key (SHA-256 hash): d8b46f29f642efbe0a2696d8ee10037a9be70689848b2c9f5427fbd9dfa182fe

Key insight: Same text → Same hash → Cache hit

Total cached keys: 3
Example cache keys:
  - models/gemini-embedding-001f45b0927-1dc1-5571-bf7d-6b51b178975c
  - models/gemini-embedding-0018f190f63-952e-5284-a87a-4e175f161d45
  - models/gemini-embedding-001fb2c97be-3aa7-5ff8-90de-a6f1bdb706d8


In [38]:
# Cache both documents and queries
fully_cached = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    store,
    query_embedding_cache=True,  # Reuse same store for queries
    namespace="gemini-all"
)


In [37]:
# use separate stores
query_store = LocalFileStore("./query_cache/")
separate_cached = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    store,
    query_embedding_cache=query_store,  # Separate cache for queries
    namespace="gemini"
)


In [39]:

print("Simulating a RAG application...")

# Embed documents once (cached)
docs = [
    "LangChain is a framework for building LLM applications",
    "Vector databases store embeddings for semantic search",
    "RAG combines retrieval and generation for better answers"
]

print(f"\nEmbedding {len(docs)} documents (will be cached)...")
doc_vecs = fully_cached.embed_documents(docs)
print(f"Document embeddings cached: {len(doc_vecs)}")

# User queries (these get cached too if repeated)
queries = [
    "What is LangChain?",
    "Tell me about vector databases",
    "What is LangChain?"  # Repeated query - should hit cache
]

print("\nProcessing user queries...")
for i, query in enumerate(queries, 1):
    start = time.time()
    query_vec = fully_cached.embed_query(query)
    elapsed = time.time() - start
    
    cache_status = "CACHE HIT" if i == 3 else "NEW QUERY"
    print(f"{i}. '{query}' ({cache_status}) - {elapsed:.4f}s")

Simulating a RAG application...

Embedding 3 documents (will be cached)...
Document embeddings cached: 3

Processing user queries...
1. 'What is LangChain?' (NEW QUERY) - 0.3307s
2. 'Tell me about vector databases' (NEW QUERY) - 0.3106s
3. 'What is LangChain?' (CACHE HIT) - 0.0013s


In [40]:
# Check cache stats
print(f"\nTotal cached embeddings: {len(list(store.yield_keys()))}")
# Clear cache if needed
store.mdelete(list(store.yield_keys()))
print("Cache cleared!")



Total cached embeddings: 8
Cache cleared!


## OpenAI

In [41]:
text = "Machine learning is a subset of AI"

In [42]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
)

In [43]:
single_vector = embeddings.embed_query(text)
print(str(single_vector)[:100])  # Show the first 100 characters of the vector

[-0.02326645515859127, -0.056493259966373444, -0.009426760487258434, -0.02003607153892517, 0.0504939


In [44]:
text2 = (
    "LangGraph is a library for building stateful, multi-actor applications with LLMs"
)
two_vectors = embeddings.embed_documents([text, text2])
for vector in two_vectors:
    print(str(vector)[:100])  # Show the first 100 characters of the vector

[-0.02327197976410389, -0.05646820738911629, -0.009404956363141537, -0.020021595060825348, 0.0505444
[-0.03336711227893829, 0.01044490933418274, 0.011859860271215439, -0.04046759381890297, 0.0021111709


## HugginFace Embeddings

In [45]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

In [46]:
from langchain_huggingface import HuggingFaceEndpointEmbeddings

embeddings = HuggingFaceEndpointEmbeddings(
    model="sentence-transformers/all-MiniLM-L6-v2"
)

query_result = embeddings.embed_query(text)
query_result[:3]

[-0.04870401695370674, -0.01661957986652851, 0.06689752638339996]

### Provider

In [None]:
!pip install -qU  langchain langchain-huggingface sentence_transformers

In [None]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
text = "This is a test document."
query_result = embeddings.embed_query(text)
doc_result = embeddings.embed_documents([text])

### Hub

In [None]:
!pip install huggingface_hub

In [None]:
from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
text = "This is a test document."
query_result = embeddings.embed_query(text)
query_result[:3]

In [None]:
# Or use separate stores
query_store = LocalFileStore("./query_cache/")
separate_cached = CacheBackedEmbeddings.from_bytes_store(
    underlying_embedder=embeddings,
    document_embedding_cache=store,
    query_embedding_cache=query_store,  # Separate cache for queries
    namespace="openai"
)
