In [1]:
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
import os
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
embeddings.embed_query("hello AI")

[-0.033388249576091766,
 0.03453976660966873,
 0.059474553912878036,
 0.05928615853190422,
 -0.06353535503149033,
 -0.06819586455821991,
 0.08823322504758835,
 0.03444080054759979,
 -0.032785240560770035,
 -0.015814969316124916,
 0.020981717854738235,
 -0.018340256065130234,
 -0.039832208305597305,
 -0.08047077059745789,
 -0.014469213783740997,
 0.03326486796140671,
 0.014259275048971176,
 -0.034049950540065765,
 -0.1429157853126526,
 -0.023083386942744255,
 -0.021380161866545677,
 0.0026335541624575853,
 -0.04729267209768295,
 -0.010752695612609386,
 -0.06866798549890518,
 0.031124982982873917,
 0.07594592869281769,
 0.0011282711056992412,
 0.0116319814696908,
 -0.036039210855960846,
 0.04483756795525551,
 0.01839073933660984,
 0.12672804296016693,
 -0.001359742833301425,
 0.00820665992796421,
 0.06909968703985214,
 -0.08076360821723938,
 -0.05841311067342758,
 0.0537545308470726,
 0.026227522641420364,
 -0.006828585639595985,
 -0.056358400732278824,
 0.0032929633744060993,
 -0.072501

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
documents=["what is a capital of USA?",
           "Who is a president of USA?",
           "Who is a prime minister of India?"]

In [8]:
my_query="Narendra modi is prime minister of india?"

In [9]:
document_embedding=embeddings.embed_documents(documents)

In [10]:
document_embedding

[[0.11998691409826279,
  -0.02130262926220894,
  -0.04288087412714958,
  0.06645579636096954,
  -0.06435239315032959,
  -0.044248614460229874,
  0.022408435121178627,
  -0.04987310245633125,
  -0.023437602445483208,
  -0.03397196903824806,
  -0.014048057608306408,
  -0.06065930798649788,
  -0.003906792961061001,
  -0.01778210885822773,
  -0.047971054911613464,
  -0.06668158620595932,
  0.004103210289031267,
  -0.013092709705233574,
  0.04439776763319969,
  0.022350700572133064,
  0.0094595430418849,
  -0.020564505830407143,
  -0.00033560613519512117,
  -0.005685802083462477,
  0.05558692291378975,
  0.02512320503592491,
  -0.0028171567246317863,
  0.008758990094065666,
  0.003255283460021019,
  -0.015963437035679817,
  0.014263652265071869,
  -0.11220847815275192,
  0.08968563377857208,
  -0.03108372539281845,
  -0.024223804473876953,
  0.006152092479169369,
  0.08058714866638184,
  0.01824994944036007,
  0.05568316951394081,
  0.016702676191926003,
  0.01589604653418064,
  0.000341124

In [11]:
query_embedding=embeddings.embed_query(my_query)

In [12]:
len(query_embedding)

384

In [13]:
cosine_similarity([query_embedding],document_embedding)

array([[0.11756672, 0.3432456 , 0.81413236]])

In [14]:
from sklearn.metrics.pairwise import euclidean_distances

In [15]:
euclidean_distances([query_embedding], document_embedding)

array([[1.32848285, 1.14608417, 0.60970101]])

| Metric            | Similarity Score Range | Behavior                              |
| ----------------- | ---------------------- | ------------------------------------- |
| Cosine Similarity | \[-1, 1]               | Focuses on angle only |
| L2 Distance       | \[0, ∞)                | Focuses on **magnitude + direction**  |


In [17]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [20]:
index=faiss.IndexFlatL2(384)

In [19]:
index

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x163814cf0> >

In [21]:
vector_store=FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)


In [22]:
vector_store.add_texts(["AI is future","AI is powerful","Dogs are cute"])

['d7bc6704-798a-401d-bbea-ba2ec305a4cd',
 '3c2c05de-03c9-4f2c-810a-bfc7425615b6',
 '2d5e793a-757e-4d81-a213-95ac90b0c457']

In [23]:
vector_store.index_to_docstore_id

{0: 'd7bc6704-798a-401d-bbea-ba2ec305a4cd',
 1: '3c2c05de-03c9-4f2c-810a-bfc7425615b6',
 2: '2d5e793a-757e-4d81-a213-95ac90b0c457'}

In [26]:
results = vector_store.similarity_search("Tell me about AI", k=2)

In [27]:
results


[Document(id='3c2c05de-03c9-4f2c-810a-bfc7425615b6', metadata={}, page_content='AI is powerful'),
 Document(id='d7bc6704-798a-401d-bbea-ba2ec305a4cd', metadata={}, page_content='AI is future')]

| Feature               | `Flat`                | `IVF` (Inverted File Index)        | `HNSW` (Graph-based Index)          |
| --------------------- | --------------------- | ---------------------------------- | ----------------------------------- |
| Type of Search     | Exact                 | Approximate (cluster-based)        | Approximate (graph-based traversal) |
| Speed               | Slow (linear scan)    | Fast (search only in top clusters) | Very Fast (graph walk)              |


| Dataset Size              | Recommended Index                 |
| ------------------------- | --------------------------------- |
| UPTO 1L                     | `IndexFlatL2` or `IndexFlatIP`    |
| UPTO 1M                  | `IndexIVFFlat` or `IndexHNSWFlat` |
| > 1M                      | `IndexIVFPQ` or `IndexHNSWFlat`   |


In [28]:
# from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [29]:
index=faiss.IndexFlatIP(384)
vector_store=FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [30]:
vector_store.add_documents(documents=documents)

['88f7a0e6-0e4b-4faa-9098-305ccc614e65',
 'f5789287-67a7-406b-8050-fe1e24e8e304',
 '20da7ffe-5ccb-400f-85ad-f39103bb04d3',
 'a28cb821-49a6-4d71-9fa8-2ad7be351c4c',
 '4f836de9-aebd-46fb-93e2-2b4ae5c1e0de',
 '30a46f84-84c8-48e2-85d8-ee3b5d053278',
 '3b9fe26e-7d53-490c-ac21-ac5980aa853a',
 '9be0dd4f-3186-442f-88ce-0f93707a367b',
 '59b0fe2c-50ab-4be7-9312-4f6acdff1c28',
 'de32715a-81c4-4d4e-825c-0ce0eea4ff85']

In [33]:
vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=3 #hyperparameter
    
)

[Document(id='20da7ffe-5ccb-400f-85ad-f39103bb04d3', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='9be0dd4f-3186-442f-88ce-0f93707a367b', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='de32715a-81c4-4d4e-825c-0ce0eea4ff85', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [37]:
vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,#hyperparameter,
    filter={"source":{"$eq": "tweet"}}
    
)

[Document(id='20da7ffe-5ccb-400f-85ad-f39103bb04d3', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='9be0dd4f-3186-442f-88ce-0f93707a367b', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [38]:
result=vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    #k=2 #hyperparameter,
    filter={"source":"news"}
    
)

In [39]:
result[0].metadata

{'source': 'news'}

In [40]:
result[0].page_content

'Robbers broke into the city bank and stole $1 million in cash.'

In [41]:
retriever=vector_store.as_retriever(search_kwargs={"k": 3})

In [42]:
retriever.invoke("LangChain provides abstractions to make working with LLMs easy")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[Document(id='20da7ffe-5ccb-400f-85ad-f39103bb04d3', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='9be0dd4f-3186-442f-88ce-0f93707a367b', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='de32715a-81c4-4d4e-825c-0ce0eea4ff85', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [None]:
inmemory(server)
ondisk(server)
cloud(yet to discuss)

In [43]:
vector_store.save_local("today's class faiss index")

In [44]:
new_vector_store=FAISS.load_local(
  "today's class faiss index",embeddings ,allow_dangerous_deserialization=True
)

In [45]:
new_vector_store.similarity_search("langchain")

[Document(id='20da7ffe-5ccb-400f-85ad-f39103bb04d3', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='9be0dd4f-3186-442f-88ce-0f93707a367b', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='4f836de9-aebd-46fb-93e2-2b4ae5c1e0de', metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(id='3b9fe26e-7d53-490c-ac21-ac5980aa853a', metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.')]

#### Making a RAG

#### Data Ingestion

In [76]:
from langchain_community.document_loaders import PyPDFLoader

In [77]:
FILE_PATH="/Users/nirmitkhurana/Desktop/ Nirmit Docs/Extra Studies/AgenticAI_2.0/Langchain Basics/6. VectorDatabase/data/llama2.pdf"

In [78]:
loader=PyPDFLoader(FILE_PATH)

In [79]:
len(loader.load())

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 29 0 (offset 0)
Ignoring wrong pointing object 35 0 (offset 0)
Ignoring wrong pointing object 44 0 (offset 0)
Ignoring wrong pointing object 55 0 (offset 0)
Ignoring wrong pointing object 62 0 (offset 0)
Ignoring wrong pointing object 64 0 (offset 0)
Ignoring wrong pointing object 66 0 (offset 0)
Ignoring wrong pointing object 92 0 (offset 0)
Ignoring wrong pointing object 110 0 (offset 0)
Ignoring wrong pointing object 138 0 (offset 0)
Ignoring wrong pointing object 195 0 (offset 0)


77

In [80]:
pages=loader.load()

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 29 0 (offset 0)
Ignoring wrong pointing object 35 0 (offset 0)
Ignoring wrong pointing object 44 0 (offset 0)
Ignoring wrong pointing object 55 0 (offset 0)
Ignoring wrong pointing object 62 0 (offset 0)
Ignoring wrong pointing object 64 0 (offset 0)
Ignoring wrong pointing object 66 0 (offset 0)
Ignoring wrong pointing object 92 0 (offset 0)
Ignoring wrong pointing object 110 0 (offset 0)
Ignoring wrong pointing object 138 0 (offset 0)
Ignoring wrong pointing object 195 0 (offset 0)


In [None]:
# use this for fast loading

# pages = []
# async for page in loader.alazy_load():
#     pages.append(page)

In [83]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [84]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,#hyperparameter
    chunk_overlap=50 #hyperparemeter
)

In [85]:
split_docs = splitter.split_documents(pages)

In [86]:
len(split_docs)

609

In [87]:
index=faiss.IndexFlatIP(384)
vector_store=FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [88]:
vector_store.add_documents(documents=split_docs)

['17f959da-a848-4819-8690-be1d7ced9d41',
 '0d449603-f872-46ee-988e-f94bd97b126d',
 '8c4945e3-ed1f-4fee-ade0-f8f569de2446',
 '35f12e13-c8a6-4ac3-a06e-aa737e23819b',
 'aa094834-97f3-4b06-a305-5366ebbca99b',
 '37398a44-f7f1-44e6-90c8-9d7a50f8a6e7',
 '849b6761-61bd-47f1-a67b-c244530dd154',
 '3d03cff9-abc8-47d7-8996-557c56d92af2',
 'c5db9217-6d89-47d7-96be-ad857d9b29d7',
 '721a8692-26da-4002-b7fe-5ba305815f29',
 '9861d3cb-dd54-4e0e-8ae5-31a43cc5d0e5',
 'ea35a107-d031-4916-a310-fbcaaf185e44',
 '96240146-a431-4245-b13c-22964c8588de',
 'b0003b1e-b1a5-4a9e-880b-3a42d879853d',
 '2d3bb9e6-6db6-4982-95da-505435a3de3c',
 'f4b47f95-25ea-49a5-a3f7-14a21f159784',
 'eb274e62-5d81-450d-b51c-7af22810f3d8',
 '6d1df41b-e9e3-4796-abcb-7374a35df120',
 '830fa869-ce2b-4592-a07f-4ae662ef38ad',
 '1f73cc46-2693-4172-87af-90a1aa3290da',
 'a8c5e8b3-02ad-4f38-88f0-bfce27b0a1b3',
 '0446f544-fe91-4f9a-8d58-c621e8203fd9',
 '737a18e1-fe17-4470-ae3a-b5cdf35f2064',
 'fade4e18-5804-41e4-9fab-d76b30c9666e',
 '0d2ee523-32f3-

In [89]:
retriever=vector_store.as_retriever(
    search_kwargs={"k": 10} #hyperparameter
)

In [90]:
retriever.invoke("what is llama model?")

[Document(id='0d2ee523-32f3-45ed-adb7-724ffa9e64c8', metadata={'producer': 'macOS Version 15.4.1 (Build 24E263) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250618235254Z00'00'", 'title': 'Learning — https:/tagmango.com/assets/llama2-bf0a30209b224e26e31087559688ce81', 'author': 'Nirmit Khurana', 'moddate': "D:20250618235254Z00'00'", 'source': '/Users/nirmitkhurana/Desktop/ Nirmit Docs/Extra Studies/AgenticAI_2.0/Langchain Basics/6. VectorDatabase/data/llama2.pdf', 'total_pages': 77, 'page': 3, 'page_label': '4'}, page_content='work (Section 6), and conclusions (Section 7).\n‡https://ai.meta.com/resources/models-and-libraries/llama/\n§ We are delaying the release of the 34B model due to a lack of time to su!ciently red team.\n¶ https://ai.meta.com/llama\n‖https://github.com/facebookresearch/llama\n4'),
 Document(id='a01ef468-bc09-4704-9659-4e8c5d4f3c75', metadata={'producer': 'macOS Version 15.4.1 (Build 24E263) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "

#### Retrieval

In [91]:
from langchain_openai import ChatOpenAI
model=ChatOpenAI(
    model="gpt-4o",
    temperature=0
)

In [92]:
model

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x36fde0ad0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x36fde1550>, root_client=<openai.OpenAI object at 0x34dc56660>, root_async_client=<openai.AsyncOpenAI object at 0x36fde0c20>, model_name='gpt-4o', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [93]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [94]:
import pprint

In [95]:
pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]

In [96]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [98]:
# What to chain?

# context(retriever),prompt(hub),model(google),parser(langchain)

In [99]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
    

In [100]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [101]:
rag_chain.invoke("what is llama model?")

'The LLaMA model, developed by Meta, is a family of large language models designed for both commercial and research use, particularly in English. It includes pretrained models that can be adapted for various natural language generation tasks and fine-tuned models intended for assistant-like chat applications. LLaMA is recognized for its computational efficiency during inference and is available under specific licensing and acceptable use policies.'