In [37]:
import os
from langchain_community.document_loaders import PyPDFLoader

def extract_content(pdf_file_path):
    # Load the PDF
    loader = PyPDFLoader(pdf_file_path)
    pages = loader.load()

    # Get the base name of the PDF file (without extension)
    base_name = os.path.splitext(os.path.basename(pdf_file_path))[0]

    # Create a directory for the extracted text file
    output_dir = os.path.join("escr-judgements-dataset/judgements_text/", base_name)
    os.makedirs(output_dir, exist_ok=True)

    # Define the path for the combined text file
    combined_file_path = os.path.join(output_dir, f"{base_name}.txt")

    # Combine all pages' content into a single string
    combined_content = "\n".join(page.page_content for page in pages)

    # Save the combined content to a text file
    with open(combined_file_path, 'w', encoding='utf-8') as file:
        file.write(combined_content)

    print(f"Combined text file has been saved to: {combined_file_path}")


dataset_path = "escr-judgements-dataset/judgements/"
for file in os.listdir(dataset_path):
    file_path = os.path.join(dataset_path, file)
    extract_content(file_path)


Combined text file has been saved to: escr-judgements-dataset/judgements_text/CIVIL APPEAL_4603_2024/CIVIL APPEAL_4603_2024.txt
Combined text file has been saved to: escr-judgements-dataset/judgements_text/CIVIL APPEAL_2179_2024/CIVIL APPEAL_2179_2024.txt
Combined text file has been saved to: escr-judgements-dataset/judgements_text/CIVIL APPEAL_6135_2024/CIVIL APPEAL_6135_2024.txt
Combined text file has been saved to: escr-judgements-dataset/judgements_text/CRIMINAL APPEAL_3589_2023/CRIMINAL APPEAL_3589_2023.txt
Combined text file has been saved to: escr-judgements-dataset/judgements_text/DIARYNO AND DIARYYR_8208_2024/DIARYNO AND DIARYYR_8208_2024.txt
Combined text file has been saved to: escr-judgements-dataset/judgements_text/SPECIAL LEAVE PETITION (CIVIL)_10159_2020/SPECIAL LEAVE PETITION (CIVIL)_10159_2020.txt
Combined text file has been saved to: escr-judgements-dataset/judgements_text/ARBITRATION PETITION_29_2023/ARBITRATION PETITION_29_2023.txt
Combined text file has been saved 

In [5]:
import os
import random
base_path = "escr-judgements-dataset/judgements_text/"
folders = os.listdir(base_path)
summary_selection_folders = random.sample(folders, 15)

In [6]:
summary_selection_folders

['WRIT PETITION (CIVIL)_643_2015',
 'CIVIL APPEAL_7230_2024',
 'WRIT PETITION (CIVIL)_255_2024',
 'CIVIL APPEAL_4602_2024',
 'CIVIL APPEAL_5194_2024',
 'DIARYNO AND DIARYYR_8208_2024',
 'CRIMINAL APPEAL_3589_2023',
 'REVIEW PETITION (CIVIL)_1036_2023',
 'CIVIL APPEAL_6741_2024',
 'CRIMINAL APPEAL_437_2015',
 'SPECIAL LEAVE PETITION (CRIMINAL)_550_2024',
 'CRIMINAL APPEAL_1738_2024',
 'SPECIAL LEAVE PETITION (CIVIL)_10159_2020',
 'CIVIL APPEAL_4603_2024',
 'CIVIL APPEAL_4272_2024']

In [11]:
from langchain_openai import AzureChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.llm import LLMChain
from langchain_core.prompts import ChatPromptTemplate
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_VERSION"] = os.getenv("OPENAI_API_VERSION")
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
llm = AzureChatOpenAI(
    azure_deployment="gpt-4o-ra-dev",  # or your deployment
    api_version="2023-06-01-preview",  # or your api version
    temperature=0
)

prompt = ChatPromptTemplate.from_messages(
    [("system", "You are an assitant to generate court judgements summary write a detailed summary of the judgement covering issue, key information driving the judement decision, judgement conclusion etc:\\n\\n{context}")]
)

In [8]:
from langchain_community.document_loaders import TextLoader
summarized_cases = []
failed_to_generate_summary_cases = []
for folder in summary_selection_folders:
    folder_path = os.path.join(base_path, folder, f"{folder}.txt")    
    loader = TextLoader(folder_path, encoding = 'UTF-8')
    docs = loader.load()

    # Instantiate chain
    chain = create_stuff_documents_chain(llm, prompt)

    try:
        # Invoke chain
        summary = chain.invoke({"context": docs})
        with open(f"escr-judgements-dataset/judgements_summaries/{folder}/{folder}.txt", 'w', encoding='utf-8') as file:
            file.write(summary)
        summarized_cases.append(folder)
        print(f"Summary for {folder} is:\n{summary}\n\n")

    except Exception as e:
        failed_to_generate_summary_cases.append(folder)
        print(f"Failed to generate summary for {folder}")
        continue

    
    

[Document(metadata={'source': 'escr-judgements-dataset/judgements_text/WRIT PETITION (CIVIL)_643_2015\\WRIT PETITION (CIVIL)_643_2015.txt'}, page_content='*\u2003Author[2024] 1 S.C.R. 327 : 2024 INSC 26\nAll India Judges Association\nv.\nUnion of India & Ors\n(Writ Petition (Civil) No 643 of 2015)\n04 January 2024\n[Dr. Dhananjaya Y Chandrachud,* CJI,  \nJ. B. Pardiwala and Manoj Misra, JJ.]\nIssue for Consideration\nAllowances granted to judicial officers and retired judicial officers \nby the Second National Judicial Pay Commission (SNJPC).\nHeadnotes\nJudiciary – District Judiciary – Recommendations by Second \nNational Judicial Pay Commission (SNJPC) regarding various \nallowances for judicial officers and retired judicial officers – 21 \nallowances considered by SNJPC in its report:\nHeld: As regards House Building Advance (HBA), recommendation \nof SNJPC that HBA be available to judicial officers also for the \npurchase of a ready built house from private individuals subject \nto

In [24]:
#summary generation

from langchain_openai import AzureChatOpenAI
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_VERSION"] = os.getenv("OPENAI_API_VERSION")
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
llm = AzureChatOpenAI(
    azure_deployment="graphrag-gpt-4o",  # or your deployment
    api_version="2023-06-01-preview",  # or your api version
    temperature=0
)

from langchain_community.document_loaders import TextLoader
loader = TextLoader("escr-judgements-dataset/judgements_text/ARBITRATION PETITION_29_2023/ARBITRATION PETITION_29_2023.txt", encoding = 'UTF-8')
docs = loader.load()

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.llm import LLMChain
from langchain_core.prompts import ChatPromptTemplate

# Define prompt
prompt = ChatPromptTemplate.from_messages(
    [("system", "You are an assitant to generate court judgements summary write a detailed summary of the judgement covering issue, key information driving the judement decision, judgement conclusion etc:\\n\\n{context}")]
)

# Instantiate chain
chain = create_stuff_documents_chain(llm, prompt)

# Invoke chain
summary = chain.invoke({"context": docs})
print(summary)

### Summary of the Judgment: M/S Arif Azim Co. Ltd. v. M/S Aptech Ltd.

**Case Details:**
- **Citation:** [2024] 3 S.C.R. 73 : 2024 INSC 155
- **Case Number:** Arbitration Petition No. 29 of 2023
- **Date:** 01 March 2024
- **Judges:** Dr. Dhananjaya Y. Chandrachud, CJI, J.B. Pardiwala, and Manoj Misra, JJ.

**Issue for Consideration:**
1. Whether the Limitation Act, 1963 is applicable to an application for the appointment of an arbitrator under Section 11(6) of the Arbitration and Conciliation Act, 1996.
2. If applicable, whether the present petition is barred by limitation.
3. When does the right to apply under Section 11(6) accrue?
4. Whether the court may refuse to make a reference under Section 11 of the Arbitration and Conciliation Act, 1996 where the claims are ex-facie and hopelessly time-barred.

**Factual Matrix:**
- The petitioner, M/S Arif Azim Co. Ltd., based in Afghanistan, entered into three franchise agreements with the respondent, M/S Aptech Ltd., in 2013.
- Disputes a

In [23]:
llm.invoke("what is machine learning")

AIMessage(content='Machine learning is a subfield of artificial intelligence (AI) that focuses on the development of algorithms and statistical models that enable computers to perform tasks without explicit instructions. Instead of being programmed with specific rules to follow, machine learning systems learn from data. They identify patterns, make decisions, and improve their performance over time based on the input they receive.\n\nHere are some key concepts in machine learning:\n\n1. **Data**: The foundation of machine learning. Data can be in various forms, such as text, images, audio, or numerical values. The quality and quantity of data significantly impact the performance of machine learning models.\n\n2. **Algorithms**: These are the mathematical procedures or formulas that a machine learning model uses to learn from data. Common algorithms include linear regression, decision trees, neural networks, and support vector machines.\n\n3. **Training**: The process of feeding data in