## PDF Query Using Langchain

In [None]:
!pip install langchain
!pip install openai
!pip install PyPDF2
!pip install faiss-cpu
!pip install tiktoken

In [None]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

In [None]:
import os
os.environ["OPENAI_API_KEY"] = SECRET_KEY
os.environ["SERPAPI_API_KEY"] = SECRET_KEY

In [None]:
# provide the path of  pdf file/files.
pdfreader = PdfReader('/content/TIAA_govt_schemes.pdf')

In [None]:
from typing_extensions import Concatenate
# read text from pdf
raw_text = ''
for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        raw_text += content

In [None]:
raw_text

In [None]:
# We need to split the text using Character Text Split such that it sshould not increse token size
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [None]:
len(texts)

26

In [None]:
# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings(openai_api_key = SECRET_KEY)

In [None]:
document_search = FAISS.from_texts(texts, embeddings)

In [None]:
document_search


<langchain.vectorstores.faiss.FAISS at 0x7aa7ce0e1f90>

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

In [None]:
chain = load_qa_chain(OpenAI(openai_api_key = SECRET_KEY), chain_type="stuff")

In [None]:
query = "Sovereign gold bonds"
docs = document_search.similarity_search(query)
chain.run(input_documents=docs, question=query)

' Sovereign Gold Bonds are issued by the Reserve Bank of India on behalf of the Indian Government. They are gold-backed government bonds and provide a set interest of 2.5% yearly on the issue price, in addition to the price fluctuation gain. They are a paper-based instrument and can be used for secured loans with them as collateral. They provide a similar Loan to Value ratio as a loan secured by actual gold and redemption is permitted after the fifth year.'

In [None]:
query = "who is the national pension scheme for?"
docs = document_search.similarity_search(query)
chain.run(input_documents=docs, question=query)

' The National Pension Scheme (NPS) is available to all Indians including NRIs (Non-Resident Indians) between the age of 18 to 60.'

In [None]:
from langchain.document_loaders import OnlinePDFLoader

In [None]:
loader = OnlinePDFLoader("https://pensionersportal.gov.in/Document/Retirement_benefits_in_one_click.pdf")

In [None]:
!pip install unstructured

In [None]:
!pip install pdf2image pdfminer.six

In [None]:
!pip install unstructured_pytesseract
!pip install unstructured_inference

In [None]:
from PIL import Image

import pytesseract

In [None]:
from pdfminer.utils import open_filename
data = loader.load()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings(openai_api_key = SECRET_KEY)

In [None]:
!pip install chromadb

In [None]:
from langchain.indexes import VectorstoreIndexCreator
index = VectorstoreIndexCreator().from_loaders([loader])

In [None]:
query = "Explain me about Right of President to withhold or withdraw pension1"
index.query(query)

' The President reserves the right to withhold or withdraw a pension or gratuity, either in full or in part, or to withdraw a pension in full or in part, whether permanently or for a specified period. This can be done if the pensioner is found guilty of grave misconduct or negligence during the period of service, including service rendered upon re-employment after retirement. The Union Public Service Commission must be consulted before any final orders are passed, and the amount of pension withheld or withdrawn must not reduce the amount of minimum family pension.'