In [1]:
# Setup
%pip install python-dotenv --upgrade --quiet langchain langchain-huggingface sentence-transformers langchain-community

from dotenv import load_dotenv
load_dotenv()

import os
from langchain_huggingface import HuggingFaceEmbeddings

# Using a FREE, open-source model from Hugging Face
# 'all-MiniLM-L6-v2' is small, fast, and very good for English.
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [2]:
vector = embeddings.embed_query("Interest")

print(f"Dimensionality: {len(vector)}")
print(f"First 5 numbers: {vector[:5]}")

Dimensionality: 384
First 5 numbers: [0.02501482516527176, 0.03360803425312042, -0.01806681603193283, 0.019716626033186913, 0.017031127586960793]


In [3]:
import numpy as np

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

vec_book = embeddings.embed_query("Book")
vec_lib = embeddings.embed_query("Library")
vec_bb = embeddings.embed_query("Blackboard")

print(f"Book vs Library: {cosine_similarity(vec_book, vec_lib):.4f}")
print(f"Book vs Blackboard: {cosine_similarity(vec_book, vec_bb):.4f}")

Book vs Library: 0.5316
Book vs Blackboard: 0.3097


In [4]:
# Setup
%pip install python-dotenv --upgrade --quiet faiss-cpu langchain-huggingface sentence-transformers langchain-community langchain_google_genai
from dotenv import load_dotenv
load_dotenv()

import getpass
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

# Using the same free model as Part 4a
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

Enter your Google API Key: ··········


In [5]:
from langchain_core.documents import Document

docs = [
    Document(page_content="Ravi's favorite food is burger with extra cheese."),
    Document(page_content="The secret password to the repository is 'Github'."),
    Document(page_content="LangChain is a framework for developing applications powered by language models."),
]

In [6]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()

In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """
Answer based ONLY on the context below:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

result = chain.invoke("What is the secret password?")
print(result)

The secret password to the repository is 'Github'.


In [8]:
import faiss
import numpy as np

# Mock Data: 10,000 vectors of size 128
d = 128
nb = 10000
xb = np.random.random((nb, d)).astype('float32')

In [9]:
index = faiss.IndexFlatL2(d)
index.add(xb)
print(f"Flat Index contains {index.ntotal} vectors")

Flat Index contains 10000 vectors


In [10]:
nlist = 100 # How many 'zip codes' (clusters) we want
quantizer = faiss.IndexFlatL2(d) # The calculator for distance
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist)

# We MUST train it first so it learns where the clusters are
index_ivf.train(xb)
index_ivf.add(xb)

In [11]:
print("Is index trained?", index_ivf.is_trained)
print("Total vectors in index:",index_ivf.ntotal)
print("Dimensionality of index:",index_ivf.d)

Is index trained? True
Total vectors in index: 10000
Dimensionality of index: 128


In [12]:
index_ivf.nprobe=5
xq=np.random.random((1, d)).astype('float32')
D, I = index_ivf.search(xq, 5)
print("Nearest Indices:",I)
print("Distances: ",D)

Nearest Indices: [[ 964  315 7176   20 3856]]
Distances:  [[16.165245 16.287088 16.371468 16.508396 16.638067]]


In [13]:
M = 16 # Number of connections per node (The 'Hub' factor)
index_hnsw = faiss.IndexHNSWFlat(d, M)
index_hnsw.add(xb)

In [14]:
index_hnsw.hnsw.efSearch = 50
xq = np.random.random((1, d)).astype('float32')
D, I = index_hnsw.search(xq, 5)
print("Nearest Indices:", I)
print("Distances:", D)

Nearest Indices: [[ 464 6252 7831 5770  840]]
Distances: [[14.281326 14.413756 15.021911 15.068469 15.074732]]


In [15]:
m = 8 # Split vector into 8 sub-vectors
index_pq = faiss.IndexPQ(d, m, 8)
index_pq.train(xb)
index_pq.add(xb)
print("PQ Compression complete. RAM usage minimized.")

PQ Compression complete. RAM usage minimized.


In [16]:
xq = np.random.random((1, d)).astype('float32')
D, I = index_pq.search(xq, 5)
print("Nearest Indices:", I)
print("Distances:", D)

Nearest Indices: [[5373  338 2931  956 1267]]
Distances: [[11.226145 11.345858 11.427726 11.438801 11.47492 ]]
