# PDF Reader 

Using Langchain and Astra DB.

In [1]:
# installing dependencies
!pip install -q cassio datasets tiktoken

In [2]:
# importing dependencies.
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from datasets import load_dataset
import cassio

In [3]:
# Python PDF library to work with PDF
!pip install PyPDF2



In [4]:
from PyPDF2 import PdfReader

In [5]:
ASTRA_DB_APPLICATION_TOKEN = "AstraCS:WREqqEcOndKZMZzYiQMuYYNb:a6dd20bc7207adc91965c5b54feac268916993f41e4d8ab9abde076cbfa0fea9"
ASTRA_DB_ID ="4d62022b-fdb6-4d3f-ad60-cfac98325fc0"
OPENAI_API_KEY="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"

In [6]:
# adding pdf.
readpdf = PdfReader('file/Constitution-of-Nepal.pdf')

In [7]:
from typing_extensions import Concatenate
# read from pdf
raw_text = ''
for i, page in enumerate(readpdf.pages):
    content = page.extract_text()
    if content:
        raw_text += content

In [8]:
raw_text



In [9]:
# initialize connection to DB.
cassio.init(token = ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)

In [10]:
# Embedding model 
embedding = OpenAIEmbeddings(openai_api_key = OPENAI_API_KEY)

In [11]:
# creating vector store.
vector_store = Cassandra(
    embedding = embedding, 
    table_name = "constNepa", 
    session = None, 
    keyspace = None
)   

In [12]:
from langchain.text_splitter import CharacterTextSplitter

In [13]:
text_split = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800, 
    chunk_overlap = 200, 
    length_function = len , 
)
texts = text_split.split_text(raw_text)

In [14]:
texts[:1]

["www.lawcommission.gov.np  \n1 \n The Constitution of Nepal  \nDate of Publication in Nepal Gazette  \n                                                                                         September 20, 2015  \nAmendments : \n1. The Constitution of Nepal (First Amendment) , 2016  \n2. The Constitution of Nepal (Second Amendment), 2020  February 28, 2016  \nJune 18, 2020  \n \nPreamble : \nWE, THE SOVEREIGN PEOPLE OF NEPAL ;  \n \nINTERNALIZING  the people's sovereign right and right to autonomy and self -rule, while \nmaintaining freedom, sovereignty, territorial integrity, national unity, independence and \ndignity of Nepal;  \n \nRECALLING  the glorious history of historic people's movements, armed strug gle, dedication"]

In [15]:
# load the texts into the vector store.
vector_store.add_texts(texts)
print("Inserted %i headlines." % len(texts))
astra_vector_index = VectorStoreIndexWrapper(vectorstore=vector_store)

Inserted 596 headlines.


In [16]:
from langchain_core.prompts import ChatPromptTemplate

In [17]:
model = ChatOpenAI(openai_api_key = OPENAI_API_KEY)

In [18]:
template = """You are a highly respected lawyer from Nepal with a detailed knowledge about the constitution of Nepal. Answer the following query based only on the provided context:
<context>
{context}
</context>
Question = {input}"""

In [19]:
from langchain.chains.combine_documents import create_stuff_documents_chain
prompt = ChatPromptTemplate.from_template(template)
text_chain = create_stuff_documents_chain(model, prompt)

In [20]:
from langchain.chains import create_retrieval_chain

In [21]:
# def retrieve_query(query, k =2):
#     matching_results = astra_vector_index.vector_find(query, k=k)
#     return matching_results

In [22]:
# def retrive_answer(query):
#     search = retrieve_query(query)
#     response = text_chain.run(input_documents = search, question = query)
#     return response

In [23]:
qn = "People in National Human Rights Commission of Nepal?"
ans = astra_vector_index.query(qn, llm = model)

In [24]:
print(ans)

The National Human Rights Commission of Nepal consists of a Chairperson and four other members.


In [25]:
for doc, score in vector_store.similarity_search_with_score(qn, k=4):
    print("[%0.4f] \"%s...\"" %(score, doc.page_content))

[0.9318] "Commission  of Nepal, consisting of a Chairperson  and four other members .  
(2) The President shall, on the recommendation of the Constitutional Council, 
appoint the Chairperson and members  of the National Human Rights Commission . 
(3) The term of office of the Chairperson  and members of the National Human 
Rights Commission shall be six years from the date of appointment.  
(4) The Chairperson and m embers appointed pursuant to  clause (2) shall not 
be eligible for reappointment.  
Provid ed that a Member may be appointed to the off ice of Chairperson, and 
when a m ember is so appointed as the Chairperson, his or her term of office shall be 
so computed as to include his or her term as the Member.  
(5) Notwithstanding  anything contained in clause (3 ), the office of the..."
[0.9318] "Commission  of Nepal, consisting of a Chairperson  and four other members .  
(2) The President shall, on the recommendation of the Constitutional Council, 
appoint the Chairperson and