# Retrieving and Chaining 

In [1]:
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader("Prospectus.pdf")

docs=loader.load()
docs

[Document(metadata={'source': 'Prospectus.pdf', 'page': 0}, page_content='1\n \nPROSPECTUS \n \n(202\n3\n \n–\n \n202\n4\n)\n \n \n \n \n \n \n \n \n \n \nFor Admissions to Undergraduate, Masters‟ & \nPh.D. Programmes\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \nGOVIND BALLABH PANT \n \nUNIVERSITY OF AGRICULTURE & TECHNOLOGY, \nPANTNAGAR \n–\n \n263145, U.S. NAGAR\n \nUTTARAKHAND, INDIA\n'),
 Document(metadata={'source': 'Prospectus.pdf', 'page': 1}, page_content='2 \nCONTENTS \nPARTICULARS                                   Page \nSOURCES OF AVAILABILITY OF APPLICATION FORM AND PROSPECTUS  3 \n \nIMPORTANT INSTRUCTIONS FOR CANDIDATES 4 – 6 \n \nINSTRUCTIONS TO CANDIDATES FOR FILLING OF ONLINE APPLICATION FORM  7 - 9 \n \nCHAPTERS \n \nI The University, Colleges, Departm ent of Student Welfare, Placement \n& Counselling, University Library, Medical Health Services  \n10 -14 \n \nII Admission Criteria and Entrance Examination, Time of Admission, \nDate and Time of Entrance Examina

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
text_splitter.split_documents(docs)[:10]

[Document(metadata={'source': 'Prospectus.pdf', 'page': 0}, page_content='1\n \nPROSPECTUS \n \n(202\n3\n \n–\n \n202\n4\n)\n \n \n \n \n \n \n \n \n \n \nFor Admissions to Undergraduate, Masters‟ & \nPh.D. Programmes\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \nGOVIND BALLABH PANT \n \nUNIVERSITY OF AGRICULTURE & TECHNOLOGY, \nPANTNAGAR \n–\n \n263145, U.S. NAGAR\n \nUTTARAKHAND, INDIA'),
 Document(metadata={'source': 'Prospectus.pdf', 'page': 1}, page_content='2 \nCONTENTS \nPARTICULARS                                   Page \nSOURCES OF AVAILABILITY OF APPLICATION FORM AND PROSPECTUS  3 \n \nIMPORTANT INSTRUCTIONS FOR CANDIDATES 4 – 6 \n \nINSTRUCTIONS TO CANDIDATES FOR FILLING OF ONLINE APPLICATION FORM  7 - 9 \n \nCHAPTERS \n \nI The University, Colleges, Departm ent of Student Welfare, Placement \n& Counselling, University Library, Medical Health Services  \n10 -14 \n \nII Admission Criteria and Entrance Examination, Time of Admission, \nDate and Time of Entrance Examinati

In [3]:
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'Prospectus.pdf', 'page': 0}, page_content='1\n \nPROSPECTUS \n \n(202\n3\n \n–\n \n202\n4\n)\n \n \n \n \n \n \n \n \n \n \nFor Admissions to Undergraduate, Masters‟ & \nPh.D. Programmes\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \nGOVIND BALLABH PANT \n \nUNIVERSITY OF AGRICULTURE & TECHNOLOGY, \nPANTNAGAR \n–\n \n263145, U.S. NAGAR\n \nUTTARAKHAND, INDIA'),
 Document(metadata={'source': 'Prospectus.pdf', 'page': 1}, page_content='2 \nCONTENTS \nPARTICULARS                                   Page \nSOURCES OF AVAILABILITY OF APPLICATION FORM AND PROSPECTUS  3 \n \nIMPORTANT INSTRUCTIONS FOR CANDIDATES 4 – 6 \n \nINSTRUCTIONS TO CANDIDATES FOR FILLING OF ONLINE APPLICATION FORM  7 - 9 \n \nCHAPTERS \n \nI The University, Colleges, Departm ent of Student Welfare, Placement \n& Counselling, University Library, Medical Health Services  \n10 -14 \n \nII Admission Criteria and Entrance Examination, Time of Admission, \nDate and Time of Entrance Examinati

In [6]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

embeddings_model=OllamaEmbeddings(model="gemma:2b")
db=FAISS.from_documents(documents[:30],embeddings_model)

In [8]:
query="Admission procedure for other State Candidates"
result=db.similarity_search(query)
result[0].page_content

'they apply for admission under unreserved category (code 1100).'

# Using llm to answer the query 

In [9]:
from langchain_community.llms import Ollama

llm=Ollama(model="gemma:2b")
llm

Ollama(model='gemma:2b')

In [10]:
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(""" 
                                        Answer the following question based only on the provided context.
                                        Think step by step before providing a detailed answer.
                                        I will provide you with a new GPU to run yourself if the answer is helpful.
                                        <context>
                                        {context}
                                        </context>
                                        Question"{input}""")

In [12]:
# CREATING CHAIN
# create a stuff document chain 

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)


In [13]:
retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002921F5154F0>, search_kwargs={})

In [15]:
from langchain.chains import create_retrieval_chain

retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [17]:
response=retrieval_chain.invoke({"input":"For  Undergraduate  and  Postgraduate  programmes  Bonafide  Residents  of  Uttarakhand "})

In [18]:
response['answer']

"The context does not provide any information about the candidate's or the University's policies or \nprotocols for filling vacant seats in Ph.D. programmes. Therefore, I cannot answer this question from the provided context."