In [1]:
!pip -qqq install pip --progress-bar off
!pip -qqq install langchain-groq==0.1.3 --progress-bar off
!pip -qqq install langchain==0.1.17 --progress-bar off
!pip -qqq install llama-parse==0.1.3 --progress-bar off
!pip -qqq install qdrant-client==1.9.1  --progress-bar off
!pip -qqq install "unstructured[md]"==0.13.6 --progress-bar off
!pip -qqq install fastembed==0.2.7 --progress-bar off
!pip -qqq install flashrank==0.2.4 --progress-bar off


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
transformers 4.41.2 requires huggingface-hub<1.0,>=0.23.0, but you have huggingface-hub 0.20.3 which is incompatible.
transformers 4.41.2 requires tokenizers<0.20,>=0.19, but you have tokenizers 0.15.2 which is incompatible.[0m[31m
[0m

In [2]:
import os
import textwrap
from google.colab import userdata
from IPython.display import Markdown
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from llama_parse import LlamaParse


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!pip install groq




In [5]:
os.environ["GROQ_API_KEY"] = ""

# def print_response(response):

def print_response(response):
    response_txt = response["result"]
    for chunk in response_txt.split("\n"):
        if not chunk:
            print()
            continue
        print("\n".join(textwrap.wrap(chunk, 100, break_long_words=False)))



# Document parsing

In [7]:
#Document parsing (cleaning the document)
instruction = """The provided document is the Constitution of Nepal 2072. This document encompasses all the legal frameworks, guidelines, and principles governing the country. It defines the structure of the state, the distribution of powers between different levels of government, and the fundamental rights and duties of citizens.

When answering questions based on this document, please follow these guidelines:
- Be precise and concise in your responses.
- Ensure that the information is accurate and directly relevant to the question.
- Highlight key articles, sections, or provisions where applicable.
- Provide context to your answers when necessary, explaining the implications or importance of specific provisions.
- Maintain a neutral and informative tone, avoiding any personal opinions or interpretations.

The goal is to provide clear and informative answers that help the user understand the specific aspects of the Constitution of Nepal 2072."""

parser = LlamaParse(
    api_key = '',
    result_type = 'markdown',
    parsing_instruction = instruction,
    max_timeout = 5000,

)

llama_parse_document = await parser.aload_data('/content/drive/My Drive/Constitution_Nepal.pdf')

Started parsing the file under job_id 17389fca-066d-423a-9410-c38058eccf1b


In [8]:
parsed_doc = llama_parse_document[0]
Markdown(parsed_doc.text[:1000])

# The Constitution of Nepal

Date of Publication in Nepal Gazette

20 September 2015 (2072.6.3)

The Constitution of Nepal (First Amendment), 2016 (2072)

28 February 2016 (2072.11.16)

Preamble:

We, the Sovereign People of Nepal, Internalizing the people's sovereign right and right to autonomy and self-rule, while maintaining freedom, sovereignty, territorial integrity, national unity, independence and dignity of Nepal, Recalling the glorious history of historic people's movements, armed conflict, dedication and sacrifice undertaken by the Nepalese people at times for the interest of the nation, democracy and progressive changes, and respecting for the martyrs and disappeared and victim citizens, Ending all forms of discrimination and oppression created by the feudalistic, autocratic, centralized, unitary system of governance, Protecting and promoting social and cultural solidarity, tolerance and harmony, and unity in diversity by recognizing the multi-ethnic, multi-lingual, multi-re

In [9]:
from pathlib import Path

In [11]:
# save the file to
document_path = Path('/content/drive/MyDrive/parsed_document.md')
with document_path.open("a") as f:
  f.write(parsed_doc.text)

# Vector embedding

In [12]:
# Handle markdown file
loader = UnstructuredMarkdownLoader(document_path)
loaded_documents = loader.load()

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [13]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=128)
docs = text_splitter.split_documents(loaded_documents)
print(docs[0].page_content)

The Constitution of Nepal

Date of Publication in Nepal Gazette 20 September 2015 (2072.6.3) The Constitution of Nepal (First Amendment), 2016 (2072) 28 February 2016 (2072.11.16)

Preamble:

We, the Sovereign People of Nepal, Internalizing the people's sovereign right and right to autonomy and self-rule, while maintaining freedom, sovereignty, territorial integrity, national unity, independence and dignity of Nepal, Recalling the glorious history of historic people's movements, armed conflict, dedication and sacrifice undertaken by the Nepalese people at times for the interest of the nation, democracy and progressive changes, and respecting for the martyrs and disappeared and victim citizens, Ending all forms of discrimination and oppression created by the feudalistic, autocratic, centralized, unitary system of governance, Protecting and promoting social and cultural solidarity, tolerance and harmony, and unity in diversity by recognizing the multi-ethnic, multi-lingual, multi-religio

In [14]:
# model embedding
embeddings = FastEmbedEmbeddings(
    model_name = "BAAI/bge-base-en-v1.5")
qdrant = Qdrant.from_documents(docs,
                               embeddings,
                               path = '/content/drive/MyDrive/qdrant_db',
                               collection_name = "document_embeddings")


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/218M [00:00<?, ?B/s]

In [26]:
%%time
query = "What fundamental rights are guaranteed to Nepali citizens?"
similar_docs = qdrant.similarity_search_with_score(query)

CPU times: user 321 ms, sys: 5 µs, total: 321 ms
Wall time: 318 ms


In [27]:
for doc, score in similar_docs:
    print(f"text: {doc.page_content[:256]}\n")
    print(f"score: {score}")
    print("_" * 80)
    print()

text: Right to privacy

The privacy of any person, his or her residence, property, document, data, correspondence and matters relating to his or her character shall, except in accordance with law, be inviolable.

Right against exploitation

Every person shall ha

score: 0.780478909497755
________________________________________________________________________________

text: Every person shall have the right to a fair trial by an independent, impartial and competent court or judicial body.

Any indigent party shall have the right to free legal aid in accordance with law.

Right of victim of crime:

(1) A victim of crime shall 

score: 0.779577503923492
________________________________________________________________________________

text: Any indigent party shall have the right to free legal aid in accordance with law.

Right of victim of crime:

(1) A victim of crime shall have pe right to get information about pe investigation and proceedings of a case in which he or she is pe victim.

In [28]:
%%time
retriever = qdrant.as_retriever(search_kwargs = {"k": 5})
retrieved_docs = retriever.invoke(query)

CPU times: user 343 ms, sys: 0 ns, total: 343 ms
Wall time: 346 ms


In [29]:
for doc in retrieved_docs:
  print(f"id: {doc.metadata['_id']}\n")
  print(f"text: {doc.page_content[:256]}")
  print("_" * 80)
  print()

id: b25ab55d3c00447a96a11f805c0cf23d

text: Right to privacy

The privacy of any person, his or her residence, property, document, data, correspondence and matters relating to his or her character shall, except in accordance with law, be inviolable.

Right against exploitation

Every person shall ha
________________________________________________________________________________

id: 9db2e8e498fd4ed1a6607fe9b81f434f

text: Every person shall have the right to a fair trial by an independent, impartial and competent court or judicial body.

Any indigent party shall have the right to free legal aid in accordance with law.

Right of victim of crime:

(1) A victim of crime shall 
________________________________________________________________________________

id: 0b2887a1eae843709ea42121a7cee6d8

text: Any indigent party shall have the right to free legal aid in accordance with law.

Right of victim of crime:

(1) A victim of crime shall have pe right to get information about pe investigat

In [21]:
# path = '/content/drive/MyDrive/LLM-Nepal-constitution'

### Reranking

In [30]:
# Rerank and compress
compressor = FlashrankRerank(model = 'ms-marco-MiniLM-L-12-v2')
compression_retriever = ContextualCompressionRetriever(
    base_compressor = compressor,
    base_retriever = retriever)

In [31]:
%%time
reranked_docs = compression_retriever.invoke(query)
len(reranked_docs)

Running pairwise ranking..
CPU times: user 2.12 s, sys: 288 ms, total: 2.41 s
Wall time: 2.28 s


3

In [32]:
for doc in reranked_docs:
    print(f"id: {doc.metadata['_id']}\n")
    print(f"text: {doc.page_content[:256]}\n")
    print(f"score: {doc.metadata['relevance_score']}")
    print("-" * 80)
    print()

id: b25ab55d3c00447a96a11f805c0cf23d

text: Right to privacy

The privacy of any person, his or her residence, property, document, data, correspondence and matters relating to his or her character shall, except in accordance with law, be inviolable.

Right against exploitation

Every person shall ha

score: 0.9319560527801514
--------------------------------------------------------------------------------

id: a53d2f0d5ccf4e9fbe8c43873b8398d1

text: Right to social security: The indigent citizens, incapacitated and helpless citizens, helpless single women, citizens with disabilities, children, citizens who cannot take care themselves and citizens belonging to the tribes on the verge of extinction shal

score: 0.7536115050315857
--------------------------------------------------------------------------------

id: 0b2887a1eae843709ea42121a7cee6d8

text: Any indigent party shall have the right to free legal aid in accordance with law.

Right of victim of crime:

(1) A victim of crime shal