Retriever and Chain with Langchain

In [1]:
from langchain_community.document_loaders import PyPDFLoader

data = PyPDFLoader("std7-bb-english.pdf")
data = data.load()

data

[Document(page_content='', metadata={'source': 'std7-bb-english.pdf', 'page': 0}),
 Document(page_content='Maharashtra State Bureau of Textbook Production and\x03Curriculum Research, Pune. The Coordination Committee formed by GR No. Abhyas - 2116/(Pra.Kra.43/16) SD - 4  \nDated 25.4.2016 has given approval to prescribe this textbook in its meeting held on 3.3.2017\nSTANDARD SEVENENGLISH\nBALBHARATI\n', metadata={'source': 'std7-bb-english.pdf', 'page': 1}),
 Document(page_content='© Maharashtra State Bureau of Textbook Production and \nCurriculum Research, Pune - 411 004.\n The Maharashtra State Bureau of Textbook Production \nand Curriculum Research reserves all rights relating to \nthe book. No part of this book should be reproduced without the written permission of the Director, Maharashtra State Bureau of Textbook Production and Curriculum  \nResearch, ‘Balbharati’, Senapati Bapat Marg, Pune 411004.\nEnglish Language Study Group :\nProduction :  \nSachchitanand Aphale \nChief Produ

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
)

text_splitter.split_documents(data)[:5]

[Document(page_content='Maharashtra State Bureau of Textbook Production and\x03Curriculum Research, Pune. The Coordination Committee formed by GR No. Abhyas - 2116/(Pra.Kra.43/16) SD - 4  \nDated 25.4.2016 has given approval to prescribe this textbook in its meeting held on 3.3.2017\nSTANDARD SEVENENGLISH\nBALBHARATI', metadata={'source': 'std7-bb-english.pdf', 'page': 1}),
 Document(page_content='© Maharashtra State Bureau of Textbook Production and \nCurriculum Research, Pune - 411 004.\n The Maharashtra State Bureau of Textbook Production \nand Curriculum Research reserves all rights relating to \nthe book. No part of this book should be reproduced without the written permission of the Director, Maharashtra State Bureau of Textbook Production and Curriculum  \nResearch, ‘Balbharati’, Senapati Bapat Marg, Pune 411004.\nEnglish Language Study Group :\nProduction :  \nSachchitanand Aphale \nChief Production Officer\nSachin Mehta\nProduction Officer\nNitin Wani\nAsst. Production Officer

In [3]:
sample_splitted_data = text_splitter.split_documents(data)[:5]
sample_splitted_data

[Document(page_content='Maharashtra State Bureau of Textbook Production and\x03Curriculum Research, Pune. The Coordination Committee formed by GR No. Abhyas - 2116/(Pra.Kra.43/16) SD - 4  \nDated 25.4.2016 has given approval to prescribe this textbook in its meeting held on 3.3.2017\nSTANDARD SEVENENGLISH\nBALBHARATI', metadata={'source': 'std7-bb-english.pdf', 'page': 1}),
 Document(page_content='© Maharashtra State Bureau of Textbook Production and \nCurriculum Research, Pune - 411 004.\n The Maharashtra State Bureau of Textbook Production \nand Curriculum Research reserves all rights relating to \nthe book. No part of this book should be reproduced without the written permission of the Director, Maharashtra State Bureau of Textbook Production and Curriculum  \nResearch, ‘Balbharati’, Senapati Bapat Marg, Pune 411004.\nEnglish Language Study Group :\nProduction :  \nSachchitanand Aphale \nChief Production Officer\nSachin Mehta\nProduction Officer\nNitin Wani\nAsst. Production Officer

In [4]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings

from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(sample_splitted_data, OllamaEmbeddings())

db

<langchain_community.vectorstores.faiss.FAISS at 0x1d9dd243740>

In [5]:
query = ""

result = db.similarity_search(query)
result[0].page_content


'Maharashtra State Bureau of Textbook Production and\x03Curriculum Research, Pune. The Coordination Committee formed by GR No. Abhyas - 2116/(Pra.Kra.43/16) SD - 4  \nDated 25.4.2016 has given approval to prescribe this textbook in its meeting held on 3.3.2017\nSTANDARD SEVENENGLISH\nBALBHARATI'

In [6]:
from langchain_community.llms import Ollama
# Load Ollama LAMA2 LLM models
llm = Ollama(model="llama2")
llm

Ollama()

In [7]:
# Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided content.
    Think step by step before providing a detailed summary.
    I will tip you $100 if user finds the answer helpful.
    <content>
    {context}
    </content>
    Question: {input}
    """
)


In [8]:
#  Chain 
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [9]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001D9DD243740>)

In [10]:
from langchain.chains import create_retrieval_chain

retriever_chain = create_retrieval_chain(retriever, document_chain)


In [None]:
retriever_chain.invoke({"input":"Canm you share the list of topics in index page"})

In [1]:
response = retriever_chain.invoke({"input":"Can you share the list of topics in index page"})
response['answer'] if 'answer' in response else response

NameError: name 'retriever_chain' is not defined