In [40]:
# Get the content(text) from pdf

from pdf_reader import PDFReader

raw_text = PDFReader().get_text("budget_speech.pdf")

In [41]:
from langchain.text_splitter import CharacterTextSplitter

# We need to split the text using Character Text Split such that it sshould not increse token size
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [8]:
import os 
from langchain.embeddings.openai import OpenAIEmbeddings

# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key= os.getenv('OPENAI_API_KEY')
)

In [9]:
# Build vector store
from langchain.vectorstores import FAISS

# Faiss is a library — developed by Facebook AI — that enables efficient similarity search

document_search = FAISS.from_texts(texts, embeddings)

In [32]:
# Get LLM
from langchain.llms import OpenAI

openai_llm = OpenAI(
    openai_api_key= os.getenv('OPENAI_API_KEY')
)

In [29]:
# Create Chain
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(
    llm=openai_llm,   # Set LLM
    chain_type="stuff" # Set Chain Type
)

In [30]:
# Prepare Docs
query = "Vision for Amrit Kaal"
docs = document_search.similarity_search(query)

In [31]:
# Run Chain
chain.run(input_documents=docs, question=query)

' The vision for Amrit Kaal is to create an empowered and inclusive economy through technology-driven and knowledge-based growth, with strong public finances and a robust financial sector. This will be achieved through Jan Bhagidari or public participation and Sabka Saath Sabka Prayas or collective efforts from all sections of society. The economic agenda for achieving this vision focuses on inclusive development, reaching the last mile, infrastructure and investment, unleashing potential, green growth, and youth power. Priority areas for development include agriculture and cooperation, digital public infrastructure for agriculture, and economic empowerment of women through programs like Deendayal Antyodaya Yojana National Rural Livelihood Mission.'

In [39]:
chain.run(input_documents=docs, question="what is PACS")

' PACS stands for Primary Agricultural Credit Societies. These are cooperative societies that provide credit and other financial services to farmers in rural areas to support their agricultural activities. They are an important part of the financial sector in promoting inclusive development and reaching the last mile in rural areas. '