In [None]:
# Setup
%pip install python-dotenv --upgrade --quiet langchain langchain-huggingface sentence-transformers langchain-community
%pip install -q langchain-google-genai

from dotenv import load_dotenv
load_dotenv()

import os
from langchain_huggingface import HuggingFaceEmbeddings

# Using a FREE, open-source model from Hugging Face
# 'all-MiniLM-L6-v2' is small, fast, and very good for English.
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/66.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.5/66.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
vector = embeddings.embed_query("Apple")

print(f"Dimensionality: {len(vector)}")
print(f"First 5 numbers: {vector[:5]}")

Dimensionality: 384
First 5 numbers: [-0.006138487718999386, 0.03101177327334881, 0.06479360908269882, 0.01094149798154831, 0.005267191678285599]


In [None]:
import numpy as np

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

vec_cat = embeddings.embed_query("Rat")
vec_dog = embeddings.embed_query("Cat")
vec_car = embeddings.embed_query("Dog")

print(f"Rat vs Cat: {cosine_similarity(vec_cat, vec_dog):.4f}")
print(f"Cat vs Dog: {cosine_similarity(vec_cat, vec_car):.4f}")

Rat vs Cat: 0.4981
Cat vs Dog: 0.5067


In [None]:
# Setup
%pip install python-dotenv --upgrade --quiet faiss-cpu langchain-huggingface sentence-transformers langchain-community
from dotenv import load_dotenv
load_dotenv()

import getpass
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

# Using the same free model as Part 4a
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

Enter your Google API Key: ··········


In [None]:
from langchain_core.documents import Document

docs = [
    Document(page_content="Reanna's favorite pet is Husky with heterochromia."),
    Document(page_content="The secret password to the lab is 'Fuchsia'."),
    Document(page_content="LangChain is a framework for developing applications powered by language models."),
]

In [None]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """
Answer based ONLY on the context below:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

result = chain.invoke("What is the secret password?")
print(result)

The secret password to the lab is 'Fuchsia'.


In [None]:
import faiss
import numpy as np

# Mock Data: 10,000 vectors of size 128
d = 128
nb = 10000
xb = np.random.random((nb, d)).astype('float32')

In [None]:
index = faiss.IndexFlatL2(d)
index.add(xb)

In [None]:
vector_0 = index.reconstruct(0)
print(vector_0[:10])


[0.9921946  0.25170207 0.42734024 0.90461874 0.44933793 0.7279866
 0.71830857 0.39484948 0.6749314  0.27950346]


In [None]:
index = faiss.IndexFlatL2(d)
index.add(xb)
print(f"Flat Index contains {index.ntotal} vectors")
xq = np.random.random((1, d)).astype('float32')
k = 5
D, I = index.search(xq, k)

print("Nearest vector indices:", I)
print("Distances:", D)


Flat Index contains 10000 vectors
Nearest vector indices: [[8527 9805 9939 1951 7958]]
Distances: [[13.92054  14.046453 14.464242 14.831488 14.842429]]


In [None]:
nlist = 100 # How many 'zip codes' (clusters) we want
quantizer = faiss.IndexFlatL2(d) # The calculator for distance
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist)

# We MUST train it first so it learns where the clusters are
index_ivf.train(xb)
index_ivf.add(xb)

In [None]:
print("Is index trained?", index_ivf.is_trained)
print("Total vectors in index:", index_ivf.ntotal)
print("Number of clusters (nlist):", index_ivf.nlist)


Is index trained? True
Total vectors in index: 10000
Number of clusters (nlist): 100


In [None]:
index_ivf.nprobe = 5   # search in 5 clusters

xq = np.random.random((1, d)).astype('float32')
D, I = index_ivf.search(xq, 5)

print("Nearest indices:", I)
print("Distances:", D)


Nearest indices: [[6723 1377 8460 6466  991]]
Distances: [[14.630767  15.6568985 15.716265  16.214739  16.348095 ]]


In [None]:
index_ivf.nprobe = 5   # search in 5 clusters

xq = np.random.random((1, d)).astype('float32')
D, I = index_ivf.search(xq, 5)

print("Nearest indices:", I)
print("Distances:", D)


Nearest indices: [[5810 4498 5017 8527 5388]]
Distances: [[13.2250805 14.18395   14.899651  14.925881  14.96047  ]]


In [None]:
M = 16 # Number of connections per node (The 'Hub' factor)
index_hnsw = faiss.IndexHNSWFlat(d, M)
index_hnsw.add(xb)

In [None]:
xq = np.random.random((1, d)).astype('float32')

D, I = index_hnsw.search(xq, 5)

print("Nearest indices:", I)
print("Distances:", D)


Nearest indices: [[8343 7021 6946 5123 5488]]
Distances: [[11.865313  12.793842  13.438298  13.82095   13.9195595]]


In [None]:
m = 8 # Split vector into 8 sub-vectors
index_pq = faiss.IndexPQ(d, m, 8)
index_pq.train(xb)
index_pq.add(xb)
print("PQ Compression complete. RAM usage minimized.")

PQ Compression complete. RAM usage minimized.


| Index | Speed     | Accuracy  | Memory   |
| ----- | --------- | --------- | -------- |
| Flat  | Slow      | 100%      | High     |
| IVF   | Fast      | High      | Medium   |
| HNSW  | Very Fast | Very High | High     |
| PQ    | Very Fast | Medium    | Very Low |

| Method | Think as        |
| ------ | --------------- |
| Flat   | Check All       |
| IVF    | Go to Section   |
| HNSW   | Travel via Hubs |
| PQ     | Compress Data   |

Flat → Exact but heavy
IVF → Clustered search
HNSW → Graph navigation
PQ → Compressed storage

