In [98]:
# A Simple RAG Pipeline
import os
from dotenv import load_dotenv, dotenv_values
from utils import clean_text, print_chunks, reset_pinecone, embed_and_upsert

# Load All ENV Values
load_dotenv()
pinecone_namespace="simple-rag-1"

In [None]:
# 1. Load the txt file
with open('docs/the-judicial-system-of-pakistan.txt', 'r', encoding='utf-8') as file:
    # Read the content of the file
    text = file.read()

print(text)

In [80]:
print("----- Cleaned Text -----")
print(clean_text(text))

----- Cleaned Text -----
Established 1988
The Judicial System
of Pakistan
Dr Faqir Hussain
Director General
Federal Judicial Academy
Islamabad
Established 1988
The Judicial System
of Pakistan
Dr Faqir Hussain
Director General
Federal Judicial Academy
Islamabad
1st edition
August
nd edition
May
rd edition
February 2011
4th edition
May
2015
The Judicial System of Pakistan
Contents
S. No

Introduction
Historical Retrospect
2.1
Hindu Period
2.2
Muslim Period
2.3
British Period
Post-Independence Evolution
Superior Judiciary
4.1
New Procedure for Appointment
4.2
Accountability
4.3
Supreme Court
4.4
High Courts
4.5
Federal Shariat Court
Subordinate Courts
Special Courts and Administrative Tribunals
6.1
Service Tribunals
Procedural Law
Terms and Conditions of Service of Subordinate Judiciary
8.1
Workload
8.2
National Judicial Policy
Organization of Judicial Administration
9.1
Organogram of Courts
9.2
Strength of Judges
9.3
Superior Courts
9.4
Subordinate Courts
9.5
Administrative Staff of 

In [81]:
# 2. Create Chunks

from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=0)
# Create chunks, extract text
texts = list(map((lambda x: x.page_content),
             text_splitter.create_documents([clean_text(text)])))
print_chunks(texts)

Total Chunks:  98
--------
CHUNK 1
--------
Established 1988
The Judicial System
of Pakistan
Dr Faqir Hussain
Director General
Federal Judicial Academy
Islamabad
Established 1988
The Judicial System
of Pakistan
Dr Faqir Hussain
Director General
Federal Judicial Academy
Islamabad
1st edition
August
nd edition
May
rd edition
February 2011
4th edition
May
2015
The Judicial System of Pakistan
Contents
S. No
--------
CHUNK 2
--------
Introduction
Historical Retrospect
2.1
Hindu Period
2.2
Muslim Period
2.3
British Period
Post-Independence Evolution
Superior Judiciary
4.1
New Procedure for Appointment
4.2
Accountability
4.3
Supreme Court
4.4
High Courts
4.5
Federal Shariat Court
Subordinate Courts
Special Courts and Administrative Tribunals
6.1
Service Tribunals
Procedural Law
Terms and Conditions of Service of Subordinate Judiciary
8.1
Workload
8.2
National Judicial Policy
Organization of Judicial Administration
9.1
Organogram of Courts
9.2
Strength of Judges
9.3
Superior Courts
9.4
Sub

In [82]:
# 3. Reset Pinecone and Upsert Chunks. Only Run Once
# reset_pinecone(pinecone_namespace)
# embed_and_upsert(texts, pinecone_namespace)

True

In [99]:
from utils import get_relevant_chunks

# https://chatgpt.com/share/271c8eea-0efc-4c6d-9e76-391a11eecc1b
queries = ["Describe the hierarchy and organizational structure of the judicial system in Pakistan",
           "Explain the reasons behind the discrepancy in the number of judges across different High Courts in Pakistan", 
           "What are the challenges faced by subordinate courts in Pakistan", 
           "Discuss the types of special courts and administrative tribunals in Pakistan and their jurisdictions", 
           "What are the procedural laws governing civil and criminal proceedings in Pakistan?", 
           "How does the appointment and independence of service tribunal members in Pakistan work?", 
           "Analyze the impact of the 2007 Provisional Constitution Order (PCO) on the strength of judges in the Supreme Court and High Courts of Pakistan", 
           "What is the role of the Federal Shariat Court in Pakistan's judicial system?", 
           "Summarize the annual performance of the judiciary in terms of case disposal and pendency as of 2013",
           "What is the significance of the Qanun-e-Shahadat Order 1984 in Pakistan’s legal proceedings?"]

query = queries[2]
relevant_chunks = get_relevant_chunks(query, pinecone_namespace, 10)
relevant_chunks_text = [item["metadata"]["text"] for item in relevant_chunks]

print_chunks(relevant_chunks, True)

# The closest chunk to the queries[0] is on 7th number based on similarity, which is not good

[autoreload of urllib3.exceptions failed: Traceback (most recent call last):
  File "/Users/arslan/Desktop/Workspace/Freelancing/lawyer-ai-project/rag-room/.venv/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/Users/arslan/Desktop/Workspace/Freelancing/lawyer-ai-project/rag-room/.venv/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 500, in superreload
    update_generic(old_obj, new_obj)
  File "/Users/arslan/Desktop/Workspace/Freelancing/lawyer-ai-project/rag-room/.venv/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 397, in update_generic
    update(a, b)
  File "/Users/arslan/Desktop/Workspace/Freelancing/lawyer-ai-project/rag-room/.venv/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 349, in update_class
    if update_generic(old_obj, new_obj):
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/arslan/Desktop/Workspace/Freelanci

[error@get_voyage_embeddings]:  Using both 'allowed_methods' and 'method_whitelist' together is not allowed. Instead only use 'allowed_methods'


KeyError: 0

In [94]:
from utils import rerank_chunks

# Lets use a reranker to see how much it helps
rerank_results = rerank_chunks(query, relevant_chunks_text)
print_chunks(rerank_results, True)

Total Chunks:  10
--------
CHUNK 1
--------
Id: 0, Score: 0.85546875
courts have heavy dockets, the Subordinate Courts have to bear the brunt. As first instance trial
courts, most of the civil and criminal litigation is conducted at this level. According to one estimate,
around 90% of litigation in Pakistan is conducted at the level of Subordinate Courts and the rest at
the level of High Courts and Supreme Court. The Subordinate Courts also operate under many
constraints. There exists shortage of courtrooms, judicial officers, ministerial staff and office
equipment. The strength of Subordinate Judiciary has not kept pace with the rise in litigation due to
which huge backlog of pending cases is accumulated, and there are enormous delays in deciding
cases. As against the recommendations of several law reform commissions and committees that the
number of cases pending with a civil judge should not be more than 500 and the number of units
pending with a District & Sessions Judge should not

In [95]:
# Only the Top Chunks should be given to llm, so we choose chunks with a score of 0.8
top_chunks = [item for item in rerank_results if item["score"] >= 0.6]
print_chunks(top_chunks, True)

Total Chunks:  2
--------
CHUNK 1
--------
Id: 0, Score: 0.85546875
courts have heavy dockets, the Subordinate Courts have to bear the brunt. As first instance trial
courts, most of the civil and criminal litigation is conducted at this level. According to one estimate,
around 90% of litigation in Pakistan is conducted at the level of Subordinate Courts and the rest at
the level of High Courts and Supreme Court. The Subordinate Courts also operate under many
constraints. There exists shortage of courtrooms, judicial officers, ministerial staff and office
equipment. The strength of Subordinate Judiciary has not kept pace with the rise in litigation due to
which huge backlog of pending cases is accumulated, and there are enormous delays in deciding
cases. As against the recommendations of several law reform commissions and committees that the
number of cases pending with a civil judge should not be more than 500 and the number of units
pending with a District & Sessions Judge should not 