loading the data

In [None]:
from llama_index.core import SimpleDirectoryReader

InfoDocs = SimpleDirectoryReader("./TestInfoDocs").load_data()

creating nodes for the info docs

In [2]:
#semantic splitter is used to store the InfoDocs
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
splitter = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=Settings.embed_model
)
infoNodes = splitter.get_nodes_from_documents(InfoDocs)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
infoNodes[1]

TextNode(id_='dcfee460-7238-4fae-b132-d7c742fef0ee', embedding=None, metadata={'page_label': '1', 'file_name': 'Finance_Act_2024.PDF', 'file_path': 'd:\\LexifyTask2\\TestInfoDocs\\Finance_Act_2024.PDF', 'file_type': 'application/pdf', 'file_size': 392095, 'creation_date': '2025-03-22', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='7c29ec7c-1306-4791-a288-d59e04202735', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'Finance_Act_2024.PDF', 'file_path': 'd:\\LexifyTask2\\TestInfoDocs\\Finance_Act_2024.PDF', 'file_type': 'application/pdf', 'file_size': 392095, 'creation_date': '2025-03-22', 'last_modified_date': '2025-03-20'}, hash='3b95bd

In [61]:
print(infoNodes[1].get_content())

(1) This Act may be called the Finance Act, 2024.(2) Save as otherwise provided in this Act,-(a)
sections 2 to 10 shall come into force on the 1st day of April, 2024;(b) sections 11 to 13 shall come
into force on such date as the Central Government may, by notification in the Official Gazette,
appoint.
Chapter II Rates of Income-tax
2. Income-tax.
The provisions of section 2 of, and the First Schedule to, the Finance Act, 2023, (8 of 2023) shall
apply in relation to income-tax for the assessment year or, as the case may be, the financial year
commencing on the 1st day of April, 2024, as they apply in relation to income-tax for the assessment
year or, as the case may be, the financial year commencing on the 1st day of April, 2023, with the
following modifications, namely:-(a) in section 2,-(i) in sub-section (1), for the figures "2023", the
figures "2024" shall be substituted;(ii) for sub-section (2), the following sub-section shall be
substituted, namely:-(2) In the cases to which Para

hybrid retrival for the info docs

In [104]:
from llama_index.core import StorageContext
from llama_index.core.storage.docstore.simple_docstore import SimpleDocumentStore

docstore = SimpleDocumentStore()
storage_context = StorageContext.from_defaults(docstore=docstore)
storage_context.persist(persist_dir="./storageInfo")

In [106]:
storage_context.docstore.add_documents(infoNodes)
storage_context.persist(persist_dir="./storageInfo")

In [109]:
from llama_index.core import SimpleKeywordTableIndex, VectorStoreIndex

vector_index = VectorStoreIndex(infoNodes, storage_context=storage_context)
keyword_index = SimpleKeywordTableIndex(infoNodes, storage_context=storage_context)


# import QueryBundle
from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

# Retrievers
from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
    KeywordTableSimpleRetriever,
)

from typing import List

In [64]:
from llama_index.llms.google_genai import GoogleGenAI

class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both semantic search and hybrid search."""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        keyword_retriever: KeywordTableSimpleRetriever,
        llm = GoogleGenAI(model="gemini-2.0-flash", api_key="AIzaSyB7rjodkd1sxe_EJ5lSi_cb6ro7anTi3XQ"),
        mode: str = "AND",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._keyword_retriever = keyword_retriever
        self._llm = llm 
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        keyword_nodes = self._keyword_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        keyword_ids = {n.node.node_id for n in keyword_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in keyword_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(keyword_ids)
        else:
            retrieve_ids = vector_ids.union(keyword_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

In [110]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

# define custom retriever
vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=2)
keyword_retriever = KeywordTableSimpleRetriever(index=keyword_index)
custom_retriever_info = CustomRetriever(vector_retriever, keyword_retriever)

# define response synthesizer
response_synthesizer = get_response_synthesizer(llm=custom_retriever_info._llm)

# assemble query engine
custom_query_engine_info= RetrieverQueryEngine(
    retriever=custom_retriever_info,
    response_synthesizer=response_synthesizer,
)

# vector query engine
vector_query_engine = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=response_synthesizer,
)
# keyword query engine
keyword_query_engine = RetrieverQueryEngine(
    retriever=keyword_retriever,
    response_synthesizer=response_synthesizer,
)

In [111]:
response = custom_query_engine_info.query(
    "How does the Finance Act, 2024 amend the calculation of income tax when there is net agricultural income?"
)
response

Response(response='When a person has both total income and net agricultural income exceeding five thousand rupees, the net agricultural income is considered as part of the total income, specifically after the initial two lakh fifty thousand rupees, but it is not taxed. To calculate the income tax, the total income and net agricultural income are combined, and tax is calculated on this aggregate amount. Then, the net agricultural income is increased by two lakh fifty thousand rupees, and tax is calculated on this increased amount. The final income tax amount is determined by subtracting the tax on the increased net agricultural income from the tax on the aggregate income. There are some exceptions to this rule for individuals of different age groups. For those who are sixty years or more but less than eighty years, "three lakh rupees" is substituted for "two lakh fifty thousand rupees". For those who are eighty years or more, "five lakh rupees" is substituted for "two lakh fifty thousan

In [69]:
from pydantic import BaseModel, Field
from typing import List

In [67]:
class ArgumentData(BaseModel):
    """Node metadata."""
    
    caseType: str = Field(
        ..., description="What was the case about"
    )
    response: str = Field(
        ..., description="How was the argument made to handle the particular case type."
    )
    legalPrinciples: List[str] = Field(
        ...,description = "the legal principles that were involved in the case"
    )


In [71]:
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.llms.google_genai import GoogleGenAI

prompt_template_str = """\
You are a legal expert analyzing case documents. Given the following case description, extract the metadata in the specified format.

Case description:
----------------
{context_str}
----------------

Extract the following details:
1. **Case Type**: What was the case about?
2. **Response**: How was the argument made to handle the case?
3. **Legal Principles**: What legal principles were involved in this case? List them.

Return the extracted details as a structured `NodeMetadata` object.
"""

llm = GoogleGenAI(model="gemini-2.0-flash", api_key="AIzaSyB7rjodkd1sxe_EJ5lSi_cb6ro7anTi3XQ")
programArgumentDocs = LLMTextCompletionProgram.from_defaults(
    output_cls=ArgumentData,
    prompt_template_str=prompt_template_str,
    llm=llm,
    verbose=True,
)

In [70]:
from llama_index.core.ingestion import IngestionPipeline
import nest_asyncio
from llama_index.core import SimpleDirectoryReader
ArgumentDocs = SimpleDirectoryReader("./TestArgumentDocs").load_data()
nest_asyncio.apply()

Combining all the argument docs of the same argument file together

In [72]:
text_files = []
file_name = ArgumentDocs[0].metadata['file_name'] 
text = ""
for docs in ArgumentDocs:
    if docs.metadata['file_name'] != file_name:
        text_files.append(text)
        file_name= docs.metadata['file_name']
        text = ""
    text = text + f"{docs.text_resource.text}"

if text:
    text_files.append(text)

In [73]:
text_files

["$~13\n* IN THE HIGH COURT OF DELHI AT NEW DELHI\n+ FAO 7/2018 & CM APPL. 462/2018\nM/S ICICI BANK LIMITED\n..... Appellant\nThrough: Mr. Punit K. Bhalla, Advocate, Ms.\nChetna Bhalla, Advocates.\nversus\nMOHAMMAD KHALIL\n..... Respondent\nThrough: None.\nCORAM:\nHON'BLE MR. JUSTICE NAJMI WAZIRI\nO R D E R\n% 08.01.2018\nCM APPL. 461/2018 (Exemption)\nAllowed subject to all just exceptions.\nThe application stands disposed off.\nFAO 7/2018 & CM APPL. 462/2018\nThis appeal seeks setting aside of an order dated 24.11.2017 passed\nby the learned Additional District Judge returning the plaint on the ground\nthat the plaint is not maintainable for want of territorial jurisdiction. The\nlearned counsel for the appellant submits that this was done at the pre-notice\nstage, hence notice would not be necessary to the respondent for the decision\nof the present appeal. In the circumstances, notice to the respondent is\ndispensed with.\nIt is the appellant’s case that a loan of Rs. 7,59,357/- ha

Creating ArgumentNodes from the text_files using the programArgumentDocs

In [74]:
from llama_index.core.schema import TextNode
nodeCaseType = []
nodeLegalPrinciples = []
nodeResponse = []
i=0
for t in text_files:
    nodeResponse.append(TextNode(text=programArgumentDocs(context_str=t).response, id_=str(i)))
    nodeLegalPrinciples.append(TextNode(text="|".join(t for t in programArgumentDocs(context_str=t).legalPrinciples), id_=str(i)))
    nodeCaseType.append(TextNode(text=programArgumentDocs(context_str=t).caseType, id_=str(i)))
    i= i+1


In [76]:
nodeCaseType

[TextNode(id_='0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Appeal against the order of returning a plaint due to lack of territorial jurisdiction in a loan recovery case.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'),
 TextNode(id_='1', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='The case involves writ petitions filed by individuals who were dismissed from service following a criminal case and subsequent departmental proceedings, but were later acquitted in the criminal case. The petitioners sought reinstatement based on their acquittal.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_

In [77]:
nodeLegalPrinciples

[TextNode(id_='0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Territorial jurisdiction of courts|Cause of action arising in a particular jurisdiction|Enforcement of loan agreements|Appointment of receiver for asset repossession|Precedent and judicial consistency', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'),
 TextNode(id_='1', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="Rule 27 (CCC) of Central Reserve Police Force regarding departmental punishment after acquittal in a criminal court|Principles regarding the effect of acquittal in criminal proceedings on departmental proceedings|Interpretation of 'honourable acquittal' in the context 

In [78]:
nodeResponse

[TextNode(id_='0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="The appeal argued that the Trial Court's decision to return the plaint was erroneous because the loan documents were executed and the loan was disbursed in Delhi, establishing territorial jurisdiction. The appellant also cited previous similar cases where the Delhi Courts were held to have jurisdiction.", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'),
 TextNode(id_='1', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="The court allowed the writ petitions, setting aside the dismissal orders and granting consequential benefits to the petitioners. The court relied on Rule 27 (CCC) of

Pydantic extractor program of user query

In [79]:
class QueryData(BaseModel):
    """User Query data."""
    
    caseType: str = Field(
        ..., description="What case type is the query"
    )
    legalPrinciples: List[str] = Field(
        ...,description = "what are the legal principles that seem to be involved in the query"
    )


In [80]:
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.llms.google_genai import GoogleGenAI

prompt_template_str = """\
You are a legal expert analyzing legal arguments. Given the following case description, extract the metadata in the specified format.

### Case Description:
----------------
{context_str}
----------------

### Extract the following details:
1. **Case Type**: Identify the category of this case. Return a single case type as a string.
2. **Legal Principles**: Identify key legal principles involved in this case. Return a list of strings.

### Expected Output Format:
Return the extracted data as a structured JSON object with the following fields:
```json
{
  "caseType": "A single case type",
  "legalPrinciples": ["Principle1", "Principle2", "Principle3"]
}
"""

llm = GoogleGenAI(model="gemini-2.0-flash", api_key="AIzaSyB7rjodkd1sxe_EJ5lSi_cb6ro7anTi3XQ")
programQuery = LLMTextCompletionProgram.from_defaults(
    output_cls=QueryData,
    prompt_template_str=prompt_template_str,
    llm=llm,
    verbose=True,
)

In [81]:
print(programQuery(context_str="Can a parrot be sued for defamation?"))

caseType='Defamation' legalPrinciples=['Capacity to be sued', 'Defamation', 'Intent', 'Legal personhood']


Argument retrival based on the user's query

In [107]:
from llama_index.core import StorageContext
from llama_index.core.storage.docstore.simple_docstore import SimpleDocumentStore
docstoreArguments = SimpleDocumentStore() 
storage_context_arguments = StorageContext.from_defaults(docstore=docstoreArguments)
storage_context_arguments.persist(persist_dir="./storageArguments")
storage_context_arguments.docstore.add_documents(nodeLegalPrinciples,nodeCaseType)

In [108]:
storage_context_arguments.persist(persist_dir="./storageArguments")

In [114]:
from llama_index.core import VectorStoreIndex

vector_index_caseType= VectorStoreIndex(nodeCaseType, storage_context=storage_context_arguments)
vector_index_LegalPrinciples= VectorStoreIndex(nodeLegalPrinciples, storage_context=storage_context_arguments)


# import QueryBundle
from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

# Retrievers
from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
)

from typing import List

In [84]:
from llama_index.llms.google_genai import GoogleGenAI

class CombinedRetriver(BaseRetriever):

    def __init__(
        self,
        vector_index_caseType: VectorIndexRetriever,
        vector_index_LegalPrinciples: VectorIndexRetriever,
        llm = GoogleGenAI(model="gemini-2.0-flash", api_key="AIzaSyB7rjodkd1sxe_EJ5lSi_cb6ro7anTi3XQ"),
        mode: str = "AND",
    ) -> None:
        """Init params."""

        self.vector_index_caseType = vector_index_caseType
        self.vector_index_LegalPrinciples = vector_index_LegalPrinciples
        self._llm = llm 
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        query_data = programQuery(context_str=query_bundle.query_str)

        vector_nodes_caseType = self.vector_index_caseType.retrieve(QueryBundle(query_str=query_data.caseType))
        vector_nodes_LegalPrinciples= self.vector_index_LegalPrinciples.retrieve(QueryBundle(query_str="|".join(t for t in query_data.legalPrinciples)))

        vector_index_caseType_ids = {n.node.node_id for n in vector_nodes_caseType}
        vector_index_LegalPrinciples_ids = {n.node.node_id for n in vector_nodes_LegalPrinciples}

        combined_dict = {n.node.node_id: n for n in vector_nodes_caseType}
        combined_dict.update({n.node.node_id: n for n in vector_nodes_LegalPrinciples})

        if self._mode == "AND":
            retrieve_ids = vector_index_caseType_ids.intersection(vector_index_LegalPrinciples_ids)
        else:
            retrieve_ids = vector_index_caseType_ids.union(vector_index_LegalPrinciples_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

In [115]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

# define custom retriever
vector_retriever_caseType = VectorIndexRetriever(index=vector_index_caseType, similarity_top_k=5)
vector_retriever_LegalPrinciples = VectorIndexRetriever(index=vector_index_LegalPrinciples, similarity_top_k=5)
custom_retriever = CombinedRetriver(vector_retriever_caseType, vector_retriever_LegalPrinciples)

# define response synthesizer
response_synthesizer = get_response_synthesizer(llm=custom_retriever._llm)

# assemble query engine
custom_query_engine_argument = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

# vector query engine
vector_query_engine_caseType = RetrieverQueryEngine(
    retriever=vector_retriever_caseType,
    response_synthesizer=response_synthesizer,
)
# keyword query engine
vector_query_engine_LegalPrinciples = RetrieverQueryEngine(
    retriever=vector_index_LegalPrinciples,
    response_synthesizer=response_synthesizer,
)

In [116]:
response = custom_query_engine_argument.query(
    "If a lender in Mumbai files a loan recovery suit in a Delhi court against a borrower who has moved there, and the court returns the plaint citing lack of territorial jurisdiction, what legal options does the lender have to challenge this decision?"
)

In [117]:
print(response)

The lender needs to consider the principles of territorial jurisdiction to determine the appropriate venue for the loan recovery suit. If the cause of action arose in Mumbai, but the borrower has moved to Delhi, the Delhi court may not have territorial jurisdiction. The lender can explore legal options to challenge the Delhi court's decision to return the plaint.



The RAG agent

In [126]:
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.llms.google_genai import GoogleGenAI
import asyncio
import os


async def find_similar_arguments(query: str) -> str:
    """Useful for understanding how similar legal cases were handled previously in the actual court"""
    response = await custom_query_engine_argument.query(query)
    return str(response)

async def search_documents(query: str) -> str:
    """Useful for getting legal information like laws,acts etc...that were passed"""
    response = await custom_query_engine_info.query(query)
    return str(response)


# Create an enhanced workflow with both tools
agent = FunctionAgent(
    name="Agent",
    tools=[find_similar_arguments, search_documents],
    description = "Useful to generate fact-based legal arguments by learning how similar legal cases were handled previously in the actual court and searching documents for legal information like laws,acts etc...that were passed that can be useful for the argument.",
    llm = GoogleGenAI(model="gemini-2.0-flash", api_key="AIzaSyB7rjodkd1sxe_EJ5lSi_cb6ro7anTi3XQ"),
    system_prompt="""You are a helpful assistant that can generate fact-based legal arguments by learning how similar legal cases were handled previously in the actual court and searching documents for legal information like laws,acts etc...that were passed that can be useful for the argument.""",
)


In [134]:
async def ragLawyerResponse():
    final = await agent.run(
        "If a lender in Mumbai files a loan recovery suit in a Delhi court against a borrower who has moved there, and the court returns the plaint citing lack of territorial jurisdiction, what legal options does the lender have to challenge this decision?"
    )
    return final

response = await ragLawyerResponse()  
print(response)  

Here's a breakdown of the lender's legal options:

**1. Review the Order:**

*   Carefully examine the order dismissing the suit. The court's reasoning for claiming a lack of territorial jurisdiction will be crucial. Identify the specific facts and legal principles the court relied upon.

**2. Legal Grounds for Challenge:**

*   **Territorial Jurisdiction:** The lender needs to argue that the Delhi court *does* have territorial jurisdiction. Here's how:
    *   **Cause of Action:** Argue that a portion of the cause of action arose in Delhi. For example, if the loan agreement was executed in Delhi, payments were to be made in Delhi, or the borrower resides/works in Delhi.
    *   **Borrower's Residence/Business:** Establish that the borrower "actually and voluntarily resides, or carries on business, or personally works for gain" within the local limits of the Delhi court's jurisdiction (Section 20 of the Code of Civil Procedure (CPC)).
    *   **Leave of the Court:** If there are multip