In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
embeddings.embed_query("What is AI?")

[-0.024964429438114166,
 -0.009133666753768921,
 -0.0074615501798689365,
 0.01500906515866518,
 0.013310400769114494,
 -0.01003604382276535,
 0.07456013560295105,
 0.042671456933021545,
 0.01698850654065609,
 0.05595095455646515,
 -0.02968190796673298,
 -0.0043560778722167015,
 0.020532336086034775,
 -0.04828270897269249,
 -0.05866947025060654,
 0.0423620231449604,
 -0.01891947351396084,
 -0.05299251899123192,
 -0.08717173337936401,
 -0.06998185813426971,
 -0.00872021820396185,
 0.019678255543112755,
 -0.04860038310289383,
 -0.0487065464258194,
 -0.03246693313121796,
 0.09295571595430374,
 0.004014125559478998,
 -0.06721631437540054,
 -0.0021876527462154627,
 -0.011162964627146721,
 0.0121077261865139,
 -0.023686645552515984,
 0.10230044275522232,
 0.019198505207896233,
 -0.06650934368371964,
 0.04128838703036308,
 -0.04098040238022804,
 -0.027531668543815613,
 0.06851060688495636,
 -0.029644552618265152,
 -0.019207928329706192,
 -0.05420266091823578,
 0.016852693632245064,
 -0.0719662

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
docs = ["What is capital of USA?",
        "Who is president of USA?",
        "Who is prime minister of India?"]

In [6]:
query = "Narendra Modi is a prime minister of India."

embed_docs = embeddings.embed_documents(docs)
embed_docs

[[0.12916424870491028,
  -0.016648169606924057,
  -0.031545717269182205,
  0.07168423384428024,
  -0.048201609402894974,
  -0.03861646354198456,
  0.026379749178886414,
  -0.04055863991379738,
  -0.031211916357278824,
  -0.031652968376874924,
  -0.016165966168045998,
  -0.05807793512940407,
  -0.00618759635835886,
  -0.017172308638691902,
  -0.05594148859381676,
  -0.05726798251271248,
  0.008715463802218437,
  -0.012498126365244389,
  0.03792087361216545,
  0.01282880362123251,
  0.011412449181079865,
  -0.019266510382294655,
  0.001485836342908442,
  -0.016834493726491928,
  0.06561405211687088,
  0.022221866995096207,
  -0.0019214540952816606,
  0.00030166094074957073,
  0.0010148158762603998,
  -0.031959570944309235,
  0.017244473099708557,
  -0.10596976429224014,
  0.07959354668855667,
  -0.03263964504003525,
  -0.02277286909520626,
  -0.0005826232372783124,
  0.07610723376274109,
  0.011086313053965569,
  0.07195227593183517,
  0.003820765297859907,
  0.012208136729896069,
  0.00

In [7]:
embed_query = embeddings.embed_query(query)
embed_query

[-0.024223633110523224,
 0.062155868858098984,
 -0.01198266725987196,
 -0.012442665174603462,
 0.03794030100107193,
 -0.04352504760026932,
 0.08473943918943405,
 -0.017722684890031815,
 -0.042698197066783905,
 0.013014213182032108,
 0.008439302444458008,
 -0.03880554810166359,
 -0.0012401372659951448,
 0.038317807018756866,
 0.05019214749336243,
 0.013480841182172298,
 -0.01003784779459238,
 -0.0018048587953671813,
 -0.03373197093605995,
 -0.06871870905160904,
 0.04352301359176636,
 0.0964057520031929,
 0.029311778023838997,
 -0.015270753763616085,
 0.01056936290115118,
 -0.02426041103899479,
 -0.03928472846746445,
 -0.03127268701791763,
 -0.021240854635834694,
 -0.0001529833534732461,
 0.031783826649188995,
 0.020750386640429497,
 -0.08357652276754379,
 -0.020933879539370537,
 -0.017965467646718025,
 0.021536216139793396,
 -0.09535069018602371,
 0.07645494490861893,
 0.10061068087816238,
 -0.06733176112174988,
 0.06443896144628525,
 -0.049659278243780136,
 0.010245386511087418,
 -0.06

In [8]:
cosine_similarity([embed_query],embed_docs)

array([[0.06259157, 0.31967895, 0.73855344]])

In [9]:
from sklearn.metrics.pairwise import euclidean_distances
distance = euclidean_distances([embed_query],embed_docs)
distance

array([[1.36923951, 1.16646568, 0.72311351]])

In [10]:
import faiss 
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [11]:
len(embed_query)

384

### FAISS

In [12]:
index = faiss.IndexFlatL2(384)
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [13]:
vector_store.add_texts(["AI is future","AI is powerful","Dogs are nice"])

['a15bdc3d-eccd-4640-919e-b12edce89c66',
 '51a3de8f-554c-47e0-8b26-932f29e267af',
 'b747c3e3-9deb-4434-8039-21298e626805']

In [14]:
vector_store.index_to_docstore_id

{0: 'a15bdc3d-eccd-4640-919e-b12edce89c66',
 1: '51a3de8f-554c-47e0-8b26-932f29e267af',
 2: 'b747c3e3-9deb-4434-8039-21298e626805'}

In [15]:
results = vector_store.similarity_search("Tell me about AI",k=2)
results

[Document(metadata={}, page_content='AI is powerful'),
 Document(metadata={}, page_content='AI is future')]

In [16]:
results = vector_store.similarity_search("how are dogs",k=1)
results[0].page_content

'Dogs are nice'

In [18]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [19]:
index = faiss.IndexFlatIP(384)
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [20]:
vector_store.add_documents(documents)

['1b35a5b5-8939-4fd8-9add-de76477fd380',
 '32741ca0-567b-4c2c-b164-d177899f6cdf',
 '4f10c528-ad88-48c1-82d9-c1ede81fe0f6',
 '00e92497-0c2f-44d9-93d7-4b0ba7681f57',
 'a0e58b9f-3e6e-40f1-bdc5-9968d38380e0',
 '1af06919-d789-48a8-a221-70ff17fedeb5',
 'ccad069b-cfb7-478c-a741-2b68f7922338',
 '9f56cece-0ae8-421f-bc19-254c2c21d359',
 'dc11f295-7b79-44ca-b4a2-110dbaa9449b',
 '69d0310f-88ef-41c1-bbe5-e686579c7785']

In [21]:
vector_store.similarity_search("LangChain provides abstractions to make working with LLMs easy",k=2)

[Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [23]:
retriever = vector_store.as_retriever(search_kwargs={'k':3})
retriever.invoke("LangChain provides abstractions to make working with LLMs easy")

[Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [24]:
vector_store.save_local("storage")

#### RAG on llama pdf

In [26]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('llama.pdf')
docs = loader.load()

In [27]:
len(docs)

77

In [28]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

In [29]:
split_docs = splitter.split_documents(docs)
len(split_docs)

615

In [31]:
print(split_docs[0])
print("-------------------------")
print(split_docs[1])

page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models
Hugo Touvron∗ Louis Martin† Kevin Stone†
Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra
Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen
Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller
Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou
Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev' metadata={'source': 'llama.pdf', 'page': 0}
-------------------------
page_content='Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich
Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra
Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi
Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang
Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuc

In [32]:
index = faiss.IndexFlatIP(384)
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

vector_store.add_documents(split_docs)

['903d659c-51ed-464f-a174-24bdf7aa24d8',
 '4539a83e-1ece-4e0d-af18-271efa0b3d74',
 '09b26e34-3272-4d94-81de-7d21f9bb3a04',
 '68bca03e-5236-4870-a3be-2195ca4ef2bb',
 '412970ec-a1a1-42cd-98be-81dfbb4c2599',
 'ac27d402-c389-41bd-98d7-c688e5d08c0e',
 '66de51b4-0924-4e7c-b719-54efa0dc38d7',
 'ac675472-80b9-4176-8355-5e8bf68c09ea',
 'c8b95e7d-8578-49c4-913b-1ee1e3623e29',
 '912c8117-c9bf-455e-831a-83b27166987d',
 '578a2d6a-fda8-4dc0-b6f4-187c82531a1c',
 '1cd88424-68b7-408a-a813-796efa859cc4',
 'b89188de-d45a-4eef-96a0-2e3dc69bf67a',
 '72448446-fae9-47f2-9134-1309f879c52a',
 'cea62dc8-ab44-40b0-b8d4-eaa8b05034d9',
 'a1126bfe-1bb0-49c9-ba24-ca0a2f4b3653',
 '96e8117d-2ee1-4f90-a1b0-58156ada3d6d',
 '78664d58-a2bc-4212-b3d6-b1b1a4f36077',
 '63af834e-48f7-4727-a022-30c8ecd6076e',
 '5e94efe0-2ba9-4622-9524-8bb68155f36c',
 '39e129fc-a20b-45ff-830c-37707ae9e5aa',
 '343c10bb-f362-48f5-a186-39fd05ae4eee',
 '4a218952-20fb-453c-baf3-b91d384ce45a',
 '4e7b95e4-71c3-482a-8818-669f8cb9c3d0',
 '4c84c5e0-d9e7-

In [33]:
retriever = vector_store.as_retriever(
    search_kwargs={'k':10}
)
retriever.invoke("what is llama model?")

[Document(metadata={'source': 'llama.pdf', 'page': 3}, page_content='work (Section 6), and conclusions (Section 7).\n‡https://ai.meta.com/resources/models-and-libraries/llama/\n§We are delaying the release of the 34B model due to a lack of time to sufficiently red team.\n¶https://ai.meta.com/llama\n‖https://github.com/facebookresearch/llama\n4'),
 Document(metadata={'source': 'llama.pdf', 'page': 42}, page_content='10.18653/v1/2022.gebnlp-1.13. URLhttps://aclanthology.org/2022.gebnlp-1.13.\nAlonTalmor,JonathanHerzig,NicholasLourie,andJonathanBerant.Commonsenseqa: Aquestionanswering\nchallenge targeting commonsense knowledge.arXiv preprint arXiv:1811.00937, 2018.\nRohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and\nTatsunori B. Hashimoto. Stanford alpaca: An instruction-following llama model.https://github.com/\ntatsu-lab/stanford_alpaca, 2023.'),
 Document(metadata={'source': 'llama.pdf', 'page': 76}, page_content='models-and-librari

In [34]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model='gemini-2.0-flash')

In [35]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")
prompt



ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [36]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [37]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [38]:
chain = (
    {"context": retriever | format_docs, "question":RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)


In [39]:
chain.invoke("what is llama model?")

'Llama 2 is a new technology intended for commercial and research use in English. Tuned models are designed for assistant-like chat, while pre-trained models can be adapted for various natural language generation tasks. It was trained between January 2023 and July 2023.'