In [None]:
print("Name: SUKRUTHA D")
print("SRN: PES2UG23CS622")

In [None]:
%pip install python-dotenv --upgrade --quiet langchain langchain-huggingface sentence-transformers langchain-community

# **4a.Embeddings and Vector Spaces**

In [None]:
from dotenv import load_dotenv
load_dotenv()

import os
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


**VIEWING A VECTOR**

In [None]:
vector = embeddings.embed_query("Orange")

print(f"Dimensionality: {len(vector)}")
print(f"First 10 numbers: {vector[:10]}")



** MATH COSINE SIMILARITY**

In [None]:
import numpy as np

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

vec_cat = embeddings.embed_query("Cat")
vec_dog = embeddings.embed_query("Dog")
vec_car = embeddings.embed_query("Car")

print(f"Cat vs Dog: {cosine_similarity(vec_cat, vec_dog):.4f}")
print(f"Cat vs Car: {cosine_similarity(vec_cat, vec_car):.4f}")

# **Part 4b: Navie RAG Pipeline**

In [None]:
!pip install -U langchain-google-genai

In [None]:
%pip install python-dotenv --upgrade --quiet faiss-cpu langchain-huggingface sentence-transformers langchain-community
from dotenv import load_dotenv
load_dotenv()
import getpass
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


**THE KNOWLEDGE BASE**

In [None]:
from langchain_core.documents import Document

docs = [
    Document(page_content="Sam likes pasta very much with roasted garlic bread"),
    Document(page_content="The secret password to the lab is 'Pasta'."),
    Document(page_content="LangChain is a framework for developing applications powered by language models."),
]

**INDEXING**

In [None]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()

**RAG Chain**

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """
Answer based ONLY on the context below:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

result = chain.invoke("What is the secret password?")
print(result)

# **PART 4c**

In [None]:
import faiss
import numpy as np

d = 128
nb = 10000
xb = np.random.random((nb, d)).astype('float32')

**FLAT INDEX**

In [None]:
index = faiss.IndexFlatL2(d)
index.add(xb)
print(f"Flat Index contains {index.ntotal} vectors")

**IVF (INVERTED FILE INDEX)**

In [None]:
nlist = 100
quantizer = faiss.IndexFlatL2(d) # The calculator for distance
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist)
index_ivf.train(xb)
index_ivf.add(xb)

In [None]:
print("Is index trained?", index_ivf.is_trained)
print("Total vectors in index:",index_ivf.ntotal)
print("Dimensionality of index:",index_ivf.d)

In [None]:
index_ivf.nprobe=5
xq=np.random.random((1, d)).astype('float32')
D, I = index_ivf.search(xq, 5)
print("Nearest Indices:",I)
print("Distances: ",D)

**HNSW (Hierarchical Navigable Small World)**

In [None]:
M = 16
index_hnsw = faiss.IndexHNSWFlat(d, M)
index_hnsw.add(xb)

In [None]:
print("Is index trained?", index_hnsw.is_trained)
print("Total vectors in index:", index_hnsw.ntotal)
print("Dimensionality of index:", index_hnsw.d)

In [None]:
index_hnsw.hnsw.efSearch=10
xq=np.random.random((1, d)).astype('float32')
D, I = index_hnsw.search(xq, 5)
print("Nearest Indices:",I)
print("Distances: ",D)

**PQ (Product Quantization)**

In [None]:
m = 8
index_pq = faiss.IndexPQ(d, m, 8)
index_pq.train(xb)
index_pq.add(xb)
print("PQ Compression complete. RAM usage minimized.")

In [None]:
print("Is index trained?", index_pq.is_trained)
print("Total vectors in index:", index_pq.ntotal)
print("Dimensionality of index:", index_pq.d)

In [None]:
xq=np.random.random((1, d)).astype('float32')
D, I = index_pq.search(xq, 5)
print("Nearest Indices:",I)
print("Distances: ",D)