<a href="https://colab.research.google.com/github/Singhsansar/legalbot/blob/main/health_care.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
# !pip install langchain
# !pip install pinecone-client
# !pip install sentence-transformers
# !pip install ctransformers
# !pip install langchain_pinecone
!pip install pypdf

Collecting pypdf
  Downloading pypdf-4.1.0-py3-none-any.whl (286 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.1/286.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-4.1.0


In [3]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers

In [4]:
PINECONE_API_KEY = "_Add_API_KEY"
PINECONE_API_ENV = "gcp-starter"# add environment by own 

In [25]:
!mkdir bot

In [26]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)

    documents = loader.load()
    return documents

In [30]:
# pdf is not here yet
extracted_data = load_pdf("/content/bot")

In [33]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [34]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 2039


In [35]:
text_chunks[0]

Document(page_content='THE CONSTITUTION OF INDIA  \n [As on 26th November , 2021 ]', metadata={'source': '/content/bot/COI...pdf', 'page': 0})

In [36]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings
embeddings = download_hugging_face_embeddings()
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [37]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [40]:
from langchain_pinecone import PineconeVectorStore
index_name = "legalbot"
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings,pinecone_api_key=PINECONE_API_KEY)
vectorstore.add_texts([t.page_content for t in text_chunks])

['87621c80-4c9a-4dd9-81b8-bc05304124c6',
 '8386a4c3-781b-468b-8b3a-6d60cba71df9',
 '62477577-50a2-40f1-abff-93c02eb5bf9f',
 '817f8efa-7704-456d-85c5-adacaa643afa',
 'e0e774c0-ac87-481b-a10e-7990794b75e8',
 '413d5aed-c11e-4c53-8e65-70e447e94185',
 '9f2cd8cf-f0e7-4b5f-8f46-eb4eb336b94c',
 'a672f8b9-64e5-4d38-8bca-6c5c15914bf5',
 '4b37892f-de6e-4331-a489-15d1bdd3bb15',
 '40c14b8d-c80e-4ce5-84bc-feacfc35bd41',
 '539d5333-9304-440c-b132-7eb1b175f33e',
 'eb616dd4-8696-41c4-87a3-7c74aeb8f353',
 '19979c64-599b-4812-8494-5f41d5c63254',
 '9f8e7f3e-1a83-42b8-ad42-bca14a827a4d',
 '58be5aa1-592b-4bb1-8a59-ee293f3ab373',
 '7e636b80-8b1c-4051-a271-708943e8064b',
 'fc263b07-70ef-4c79-8a82-4b1e2ea43b4c',
 'e3ea9cf7-4581-403c-a398-01a969696eb5',
 '28184e2a-519a-460f-954e-35fb8c4d67f0',
 '7c08290a-bd74-41a5-9eb2-c6e2519ef7a3',
 '6e6fdcaf-96b7-4f7b-a9d5-4619f8e3e0f6',
 'a57159e9-9e41-4afb-99ad-568c3b4c5b89',
 '87cd847c-74ca-47e8-ad74-1919491960e5',
 '2e21b63a-569d-4f48-af4d-87d2f5053e08',
 'db594619-caf5-

In [41]:
query = "What does the word public servant mean?"
vectorstore.similarity_search(
    query,
    k=4 )

[Document(page_content='Explanation 2 .—Wherever the words “public servant ” occur, they shall be understood of every \nperson who is in actual possession of the situation of a public servant, whatever legal defect there may be \nin his right to hold that situation.  \n 3[Explanation 3 .—The word “election ” denotes an election for the purpose of selecting members of \nany legislative, municipal or other public authority, of whatever character, the method of selection to'),
 Document(page_content='Explanation 2 .—Wherever the words “public servant ” occur, they shall be understood of every \nperson who is in actual possession of the situation of a public servant, whatever legal defect there may be \nin his right to hold that situation.  \n 3[Explanation 3 .—The word “election ” denotes an election for the purpose of selecting members of \nany legislative, municipal or other public authority, of whatever character, the method of selection to'),
 Document(page_content='performance of any

In [42]:
prompt_template = """|
    As a legal professional, provide an informed response to the user's query. Ensure your answer adheres to legal standards and includes relevant rules and regulations.

    Context: {context}
    Question: {question}

    Present your answer in accordance with legal principles and refrain from conjecture or speculation.

    Helpful answer:
    """


In [43]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

llm=CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
                  model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
                  config={"max_new_tokens": 2048, "context_length": 4096, "temperature": 0},)

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [44]:
qa  = RetrievalQA.from_chain_type(
      llm=llm,
      chain_type="stuff",
      retriever=vectorstore.as_retriever(search_kwargs={'k': 2}),
      return_source_documents=True,
      chain_type_kwargs=chain_type_kwargs,
      verbose=True
      )


user_input=input(f"Input Prompt:")
result=qa({"query": user_input})
print("Response : ", result["result"])


Input Prompt:Expain about the article 371 in the Indian consitution of india


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Response :  
    Article 371 of the Constitution of India is a special provision that allows certain states to have their own laws and regulations, even if they are not consistent with the laws of the Union. This provision was included in the Constitution as a compromise between those who wanted a strong central government and those who wanted more autonomy for the states.
    
    Under Article 371, any state that is not covered by the provisions of Articles 360 to 362 may make its own laws and regulations, subject to certain conditions. These conditions include that the laws must not be repugnant to the Constitution or to any law enacted by Parliament, and that they must not discriminate on the basis of religion, race, caste, sex, or place of birth.
    
    It is important to note that Article 371 does not give states complete autonomy