Retriever and chain with langchain

In [1]:
#Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('Unit5_CA_CPU.pdf')
docs=loader.load()


In [2]:
#spillting texts
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents[:10]

[Document(page_content='Unit-5\nCENTRAL  PROCESSING  UNIT\nBy\nEr.Sachita Nand Mishra\nM.E. in Computer and Electronics \nEngineering', metadata={'source': 'Unit5_CA_CPU.pdf', 'page': 0}),
 Document(page_content='CENTRAL  PROCESSING  UNIT\n•Introduction\n•General Register Organization\n•Stack Organization\n•Instruction Formats\n•Addressing Modes\n•Data Transfer and Manipulation\n•Program Control\n•Reduced Instruction Set Computer', metadata={'source': 'Unit5_CA_CPU.pdf', 'page': 1}),
 Document(page_content='Introduction\n•Part of computer that performs the bulk of data \nprocessing operations is called the Central \nprocessing Unit(CPU). It Consists of 3 major parts:\noRegister set: stores intermediate data during execution of an \ninstruction .\noALU: performs various microoperations required \noControl unit: supervises register transfers and instructs ALU', metadata={'source': 'Unit5_CA_CPU.pdf', 'page': 2}),
 Document(page_content='MAJOR  COMPONENTS  OF  CPUIntroduction \n•Storage C

In [3]:
#vector embedding and vector store
#FAAIS vector database
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
db=FAISS.from_documents(documents[:30],OllamaEmbeddings())

In [4]:
#vector database
query=' GENERAL  REGISTER  ORGANIZATION'
result=db.similarity_search(query)
result[0].page_content

'ONE ADDRESS INSTRUCTIONS\n•One-Address Instructions\n-Use an implied AC register for all data manipulation\n-Program to evaluate  X = (A + B) * (C + D) :\nLOAD   A           /*  AC \uf0acM[A]   */\nADD     B           /*  AC \uf0acAC + M[B]  */\nSTORE  T            /*  M[T] \uf0acAC   */\nLOAD   C           /*  AC \uf0acM[C]   */\nADD     D           /*  AC \uf0acAC + M[D] */\nMUL     T            /*  AC \uf0acAC * M[T] */\nSTORE  X           /*  M[X] \uf0acAC   */'

In [5]:
from langchain_community.llms import Ollama
#load ollama llama3 llm model
llm=Ollama(model='llama3:latest')


In [6]:
# design chat prompt 
from langchain_core.prompts import ChatPromptTemplate
prompt=ChatPromptTemplate.from_template("""
Answer the following question based only on provided context.
Think step by step before providing a detailed answer.
<context>
{context}      
</context>  

question:{input}                             
""")

In [7]:
#chain introduction
#create stuff Document chain
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain=create_stuff_documents_chain(llm,prompt)


In [8]:
retriver=db.as_retriever()
retriver

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000021C7F628940>)

In [9]:
#combine retriever and document chain to make retrieval chain
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriver,document_chain)


In [10]:
response=retrieval_chain.invoke({"input":" Why we need CPU registers?"})

In [12]:
response['answer']

"A great question!\n\nIn the context of computer architecture, CPU (Central Processing Unit) registers are a crucial component that enables the CPU to efficiently execute instructions and manage data. Here's why we need CPU registers:\n\n1. **Fast Access**: Registers provide fast access to stored values, which is essential for efficient processing. They are located on the CPU chip, close to the execution units, making it much faster to access register contents compared to accessing main memory.\n2. **Temporary Storage**: Registers act as temporary storage for intermediate results, allowing the CPU to perform complex calculations and operations without having to access main memory excessively.\n3. **Addressing Mode Support**: As described in the context, registers can be used as part of addressing modes (e.g., indexed or base register addressing). This enables the CPU to access different parts of memory quickly and efficiently.\n4. **Instruction Execution**: Registers are necessary for 