In [53]:
from transformers import pipeline

# Load summarization model
summarizer = pipeline("summarization", model="t5-small")

# Example document summarization
case_details = """
The plaintiff filed a complaint against the defendant for unauthorized land acquisition...
"""
summary = summarizer(case_details, max_length=100, min_length=30, do_sample=False)
print("Summary:", summary[0]["summary_text"])


Device set to use cpu
Your max_length is set to 100, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


Summary: the plaintiff filed a complaint against the defendant for unauthorized land acquisition . the defendant is accused of unauthorized property acquisition... a land acquisition of a property .


In [54]:
import requests
from bs4 import BeautifulSoup

def fetch_legal_references(query):
    url = "https://indiankanoon.org/search/"
    params = {"formInput": query}
    response = requests.get(url, params=params)
    soup = BeautifulSoup(response.text, "html.parser")

    cases = soup.find_all("div", class_="result_title")
    results = []
    for case in cases:
        title = case.find("a").text
        link = "https://indiankanoon.org" + case.find("a")["href"]
        results.append({"title": title, "link": link})

    return results

# Example query for land dispute
legal_references = fetch_legal_references("land dispute")
print(legal_references)


[]


In [55]:
import requests
from bs4 import BeautifulSoup

def fetch_legal_references(query, num_pages=1):
    url = "https://indiankanoon.org/search/"
    results = []

    for page in range(1, num_pages + 1):
        params = {"formInput": query, "pagenum": page}
        response = requests.get(url, params=params)
        soup = BeautifulSoup(response.text, "html.parser")

        cases = soup.find_all("div", class_="result_title")
        for case in cases:
            title = case.find("a").text
            link = "https://indiankanoon.org" + case.find("a")["href"]
            results.append({"title": title, "link": link})

    return results

# Example query for land dispute, fetching results from multiple pages
legal_references = fetch_legal_references("land dispute", num_pages=3)  # Adjust num_pages based on results
print(legal_references)


[]


In [56]:
!pip install selenium



In [58]:
!pip install pyppeteer




In [59]:
import asyncio
from pyppeteer import launch, errors
from bs4 import BeautifulSoup

async def fetch_legal_references(query, num_pages=1):
    # Launch a headless browser with timeout configuration
    browser = await launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
    page = await browser.newPage()

    url = "https://indiankanoon.org/search/"
    results = []

    try:
        for page_num in range(1, num_pages + 1):
            params = {"formInput": query, "pagenum": page_num}
            full_url = f"{url}?{'&'.join(f'{key}={val}' for key, val in params.items())}"
            await page.goto(full_url, timeout=30000)  # Increased timeout to 30 seconds

            content = await page.content()
            soup = BeautifulSoup(content, "html.parser")

            cases = soup.find_all("div", class_="result_title")
            for case in cases:
                title = case.find("a").text
                link = "https://indiankanoon.org" + case.find("a")["href"]
                results.append({"title": title, "link": link})

    except errors.BrowserError as e:
        print("Browser closed unexpectedly:", e)
    finally:
        await browser.close()

    return results

# Example query for land dispute
async def main():
    legal_references = await fetch_legal_references("land dispute", num_pages=3)
    print(legal_references)

# Running it using %async magic in Jupyter or aw


In [60]:
import asyncio
from pyppeteer import launch, errors
from bs4 import BeautifulSoup

async def fetch_legal_references(query, num_pages=1):
    results = []
    attempts = 0
    max_attempts = 3

    while attempts < max_attempts:
        try:
            # Launch a headless browser with timeout configuration
            browser = await launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
            page = await browser.newPage()

            url = "https://indiankanoon.org/search/"

            for page_num in range(1, num_pages + 1):
                params = {"formInput": query, "pagenum": page_num}
                full_url = f"{url}?{'&'.join(f'{key}={val}' for key, val in params.items())}"
                await page.goto(full_url, timeout=30000)  # 30-second timeout

                content = await page.content()
                soup = BeautifulSoup(content, "html.parser")

                cases = soup.find_all("div", class_="result_title")
                for case in cases:
                    title = case.find("a").text
                    link = "https://indiankanoon.org" + case.find("a")["href"]
                    results.append({"title": title, "link": link})

            await browser.close()
            break  # If successful, break out of the loop
        except errors.BrowserError as e:
            print(f"Attempt {attempts + 1} failed: {e}")
            attempts += 1
        except Exception as e:
            print(f"Unexpected error on attempt {attempts + 1}: {e}")
            attempts += 1

    if attempts == max_attempts and not results:
        print("Failed to fetch legal references after multiple attempts.")
    return results

# Example query for land dispute
async def main():
    legal_references = await fetch_legal_references("land dispute", num_pages=3)
    print(legal_references)

# Running it using %async magic in Jupyter or await in Colab
# In Jupyter Notebook:
# %async main()

# In Google Colab:
await main()


[]


In [61]:
import asyncio
from pyppeteer import launch, errors
from bs4 import BeautifulSoup

async def fetch_legal_references(query, num_pages=1):
    results = []
    attempts = 0
    max_attempts = 5
    base_url = "https://indiankanoon.org/search/"

    while attempts < max_attempts:
        try:
            # Launch a headless browser with improved args for stability
            browser = await launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
            page = await browser.newPage()

            for page_num in range(1, num_pages + 1):
                params = {"formInput": query, "pagenum": page_num}
                full_url = f"{base_url}?{'&'.join(f'{key}={val}' for key, val in params.items())}"
                print(f"Accessing URL: {full_url}")

                await page.goto(full_url, timeout=45000)  # Increased timeout to 45 seconds
                content = await page.content()
                soup = BeautifulSoup(content, "html.parser")

                cases = soup.find_all("div", class_="result_title")
                for case in cases:
                    title = case.find("a").text
                    link = "https://indiankanoon.org" + case.find("a")["href"]
                    results.append({"title": title, "link": link})

            await browser.close()
            break  # Exit loop if successful
        except errors.BrowserError as e:
            print(f"Attempt {attempts + 1} failed: {e}")
            attempts += 1
        except Exception as e:
            print(f"Unexpected error on attempt {attempts + 1}: {e}")
            attempts += 1

    if attempts == max_attempts and not results:
        print("Failed to fetch legal references after multiple attempts.")

    return results

# Example query for land dispute
async def main():
    legal_references = await fetch_legal_references("land dispute", num_pages=3)
    print(legal_references)

# Running it using %async magic in Jupyter or await in Colab
# In Jupyter Notebook:
# %async main()

# In Google Colab:
await main()


Accessing URL: https://indiankanoon.org/search/?formInput=land dispute&pagenum=1
Accessing URL: https://indiankanoon.org/search/?formInput=land dispute&pagenum=2
Accessing URL: https://indiankanoon.org/search/?formInput=land dispute&pagenum=3
[]


In [62]:
import aiohttp
import asyncio
from bs4 import BeautifulSoup

async def fetch_legal_references(session, query, num_pages=1):
    results = []
    base_url = "https://indiankanoon.org/search/"

    for page_num in range(1, num_pages + 1):
        url = f"{base_url}?formInput={query}&pagenum={page_num}"
        print(f"Accessing URL: {url}")

        async with session.get(url) as response:
            if response.status == 200:
                page_content = await response.text()
                soup = BeautifulSoup(page_content, "html.parser")
                cases = soup.find_all("div", class_="result_title")

                for case in cases:
                    title = case.find("a").text
                    link = "https://indiankanoon.org" + case.find("a")["href"]
                    results.append({"title": title, "link": link})
            else:
                print(f"Failed to retrieve page {page_num}")

    return results

async def main():
    async with aiohttp.ClientSession() as session:
        legal_references = await fetch_legal_references(session, "land dispute", num_pages=3)
        print(legal_references)

# Run in Jupyter Notebook or Google Colab
# In Jupyter Notebook:
# %run script_name.py

# In Google Colab:
await main()


Accessing URL: https://indiankanoon.org/search/?formInput=land dispute&pagenum=1
Failed to retrieve page 1
Accessing URL: https://indiankanoon.org/search/?formInput=land dispute&pagenum=2
Failed to retrieve page 2
Accessing URL: https://indiankanoon.org/search/?formInput=land dispute&pagenum=3
Failed to retrieve page 3
[]


In [63]:
import pandas as pd
df = pd.read_csv("final_judge_database.csv")


In [64]:
import os

# Create the directory if it doesn't exist
os.makedirs("/mnt/data", exist_ok=True)

# Move the file to the directory
# Assuming the file is in the current working directory
import shutil
shutil.move("final_judge_database.csv", "/mnt/data/final_judge_database.csv")

# Now read the file
df = pd.read_csv("/mnt/data/final_judge_database.csv")


In [65]:
print("Current Directory:", os.getcwd())
print("Files in Current Directory:", os.listdir("."))


Current Directory: /content
Files in Current Directory: ['.config', 'prayer_clauses_dataset.csv', 'ipc_sections.csv', 'wandb', 'drive', 'prayer_clause_model', 'logs', 'sample_data']


In [66]:
# Display the first few rows of the dataset
print(df.head())

# Display column names
print(df.columns)

# Check for null or missing values
print(df.isnull().sum())


   Unnamed: 0                                         case title  \
0           0        Jagdish Saran & Ors vs Union Of India & Ors   
1           1  Grindlays Bank Limited vs The Income Tax Offic...   
2           2  Deep Chand And Anr. vs State Of Uttar Pradesh ...   
3           3  Managing Director, Uttar Pradesh ... vs Vinay ...   
4           4                   State Of Rajasthan vs Daulat Ram   

           judges name(s)  date of judgment  \
0       Krishnaiyer, V.R.  28 January, 1980   
1            Pathak, R.S.  15 January, 1980   
2   N Untwalia, O C Reddy  16 January, 1980   
3  Sarkaria, Ranjit Singh  16 January, 1980   
4       A Koshal, S M Ali  23 January, 1980   

                                            citation  \
0                   1980 AIR  820, 1980 SCR  (2) 831   
1                   1980 AIR  656, 1980 SCR  (2) 765   
2   AIR 1980 SC 633, (1980) 3 SCC 231, 1980 (12) ...   
3                   1980 AIR  840, 1980 SCR  (2) 773   
4   AIR 1980 SC 1314, 1980 C

In [67]:
from transformers import pipeline

# Load summarization pipeline
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")

# Example case text
case_text = "The petitioner seeks relief regarding a land dispute where the lower court ruled in favor of the respondent despite contradictory evidence."

# Summarize the case
summary = summarizer(case_text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
print("Case Summary:", summary)


Device set to use cpu
Your max_length is set to 100, but your input_length is only 33. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)


Case Summary: the petitioner seeks relief regarding a land dispute . the lower court ruled in favor of the respondent despite contradictory evidence .


In [68]:
def retrieve_citations(issue, dataset):
    # Ensure column name is standardized
    if 'Issues' not in dataset.columns:
        raise ValueError("The 'Issues' column is missing. Check your dataset.")

    # Retrieve rows matching the issue
    relevant_cases = dataset[dataset['Issues'].str.contains(issue, case=False, na=False)]
    return relevant_cases[['Case Title', 'Cited Cases']]

# Example usage
issue = "land dispute"
try:
    citations = retrieve_citations(issue, df)
    print(citations)
except ValueError as e:
    print(e)


The 'Issues' column is missing. Check your dataset.


In [69]:
def retrieve_citations_alternative(keyword, dataset):
    """
    Retrieve relevant cases by searching in the 'Case Title' or 'Cited Cases' columns.

    Args:
        keyword (str): Keyword to search for in the dataset.
        dataset (pd.DataFrame): The case dataset.

    Returns:
        pd.DataFrame: Relevant rows from the dataset.
    """
    # Check if columns exist
    if 'Case Title' not in dataset.columns or 'Cited Cases' not in dataset.columns:
        raise ValueError("Required columns ('Case Title', 'Cited Cases') are missing in the dataset.")

    # Filter rows based on keyword in 'Case Title' or 'Cited Cases'
    relevant_cases = dataset[
        dataset['Case Title'].str.contains(keyword, case=False, na=False) |
        dataset['Cited Cases'].str.contains(keyword, case=False, na=False)
    ]
    return relevant_cases[['Case Title', 'Cited Cases']]

# Example usage
keyword = "land dispute"
try:
    citations = retrieve_citations_alternative(keyword, df)
    print("Relevant Citations:\n", citations)
except ValueError as e:
    print(e)


Required columns ('Case Title', 'Cited Cases') are missing in the dataset.


In [70]:
!pip install exa-py


Collecting openai>=1.10.0 (from exa-py)
  Using cached openai-1.59.5-py3-none-any.whl.metadata (27 kB)
Using cached openai-1.59.5-py3-none-any.whl (454 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 0.28.0
    Uninstalling openai-0.28.0:
      Successfully uninstalled openai-0.28.0
Successfully installed openai-1.59.5


In [71]:
import pandas as pd
import re
import openai
import json  # Import json module to fix the NameError

# Step 1: Load the dataset
file_path = "/mnt/data/final_judge_database.csv"
df = pd.read_csv(file_path)

# Step 2: Set up OpenAI API key
openai.api_key = "sk-your-openai-key"

# Step 3: Define the function to extract acts and sections
def extract_relevant_sections(case_text):
    relevant_sections = []
    pattern = r"(Section\s\d+|Act\s\d+|Clause\s\d+)(.*?)(\n\n|\Z)"
    matches = re.findall(pattern, case_text, re.DOTALL)

    for match in matches:
        relevant_sections.append(match[0] + match[1])

    return relevant_sections

# Step 4: Define a function to query OpenAI for mapping and summaries
def query_llm(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300,
        temperature=0.7,
    )
    return response.choices[0].message.content

def map_acts_sections(relevant_sections):
    mapped_data = []

    for section in relevant_sections:
        prompt = f"Explain the following legal provision and its relevance:\n\n{section}"
        summary = query_llm(prompt)

        mapped_data.append({
            "act_section": section,
            "llm_summary": summary
        })

    return mapped_data

# Step 5: Process the dataset
all_mapped_acts_sections = []

for index, row in df.iterrows():
    case_text = f"Case Title: {row.get('Case Title', '')}\n" \
                f"Judges: {row.get('Judges Name(s)', '')}\n" \
                f"Decision: {row.get('Decision', '')}\n" \
                f"Cited Cases: {row.get('Cited Cases', '')}"

    # Extract relevant sections from the case text
    relevant_sections = extract_relevant_sections(case_text)

    # Map the acts/sections to the extracted text using OpenAI
    mapped_acts_sections = map_acts_sections(relevant_sections)
    all_mapped_acts_sections.extend(mapped_acts_sections)

# Step 6: Display the results
print(json.dumps(all_mapped_acts_sections, indent=2))


[]


In [72]:
import time
import openai

def retry_query_llm(prompt, retries=3, backoff=10):
    for attempt in range(retries):
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=300,
                temperature=0.7,
            )
            return response.choices[0].message.content
        except openai.error.RateLimitError:
            print(f"Rate limit hit. Retrying in {backoff} seconds... (Attempt {attempt + 1})")
            time.sleep(backoff)
    raise Exception("Exceeded maximum retries")

In [73]:
# Example: Map issues to IPC sections
issue_to_ipc = {
    "land dispute": ["Section 420 - Cheating", "Section 34 - Common Intent"],
    "contract dispute": ["Section 73 - Breach of Contract"]
}

ipc_sections = issue_to_ipc.get(issue, [])
print("Applicable IPC Sections:", ipc_sections)


Applicable IPC Sections: ['Section 420 - Cheating', 'Section 34 - Common Intent']


In [74]:
print(df.columns.tolist())


['Unnamed: 0', 'case title', 'judges name(s)', 'date of judgment', 'citation', 'issues', 'decision', 'cited cases']


In [75]:
df.columns = df.columns.str.strip().str.lower()


In [76]:
if 'issues' not in df.columns:
    print("The 'Issues' column is missing. Check your dataset.")
else:
    print("The 'Issues' column is present.")


The 'Issues' column is present.


In [77]:
def retrieve_citations(issue, dataset):
    # Ensure column name is standardized
    if 'Issues' not in dataset.columns:
        raise ValueError("The 'Issues' column is missing. Check your dataset.")

    # Retrieve rows matching the issue
    relevant_cases = dataset[dataset['Issues'].str.contains(issue, case=False, na=False)]
    return relevant_cases[['Case Title', 'Cited Cases']]

# Example usage
issue = "land dispute"
try:
    citations = retrieve_citations(issue, df)
    print(citations)
except ValueError as e:
    print(e)


The 'Issues' column is missing. Check your dataset.


In [78]:
# Step 1: Print the available columns
print("Available columns in the dataset:", df.columns.tolist())

# Step 2: Check for leading/trailing spaces or inconsistencies
df.columns = df.columns.str.strip().str.lower()
print("Standardized columns:", df.columns.tolist())

# Step 3: Check if the 'Issues' column is present
if 'issues' not in df.columns:
    print("The 'Issues' column is missing. Available columns after standardization:", df.columns.tolist())
else:
    print("The 'Issues' column is present.")


Available columns in the dataset: ['unnamed: 0', 'case title', 'judges name(s)', 'date of judgment', 'citation', 'issues', 'decision', 'cited cases']
Standardized columns: ['unnamed: 0', 'case title', 'judges name(s)', 'date of judgment', 'citation', 'issues', 'decision', 'cited cases']
The 'Issues' column is present.


In [79]:
def retrieve_citations(issue, dataset):
    # Standardize column names
    dataset.columns = dataset.columns.str.strip().str.lower()  # Removes spaces and converts to lowercase
    print("Standardized columns:", dataset.columns.tolist())

    # Ensure the 'issues' column is present after standardization
    if 'issues' not in dataset.columns:
        raise ValueError("The 'issues' column is missing. Check your dataset.")

    # Retrieve rows matching the issue
    relevant_cases = dataset[dataset['issues'].str.contains(issue, case=False, na=False)]
    return relevant_cases[['case title', 'cited cases']]

# Example usage
issue = "land dispute"
try:
    citations = retrieve_citations(issue, df)
    print("Retrieved Citations:")
    print(citations)
except ValueError as e:
    print(e)


Standardized columns: ['unnamed: 0', 'case title', 'judges name(s)', 'date of judgment', 'citation', 'issues', 'decision', 'cited cases']
Retrieved Citations:
Empty DataFrame
Columns: [case title, cited cases]
Index: []


In [80]:
def retrieve_citations(issue, dataset):
    # Standardize column names
    dataset.columns = dataset.columns.str.strip().str.lower()  # Removes spaces and converts to lowercase
    print("Standardized columns:", dataset.columns.tolist())

    # Ensure the 'issues' column is present after standardization
    if 'issues' not in dataset.columns:
        raise ValueError("The 'issues' column is missing. Check your dataset.")

    # Display a sample of the issues column for debugging
    print("Sample of 'issues' column:")
    print(dataset['issues'].head())

    # Retrieve rows matching the issue
    relevant_cases = dataset[dataset['issues'].str.contains(issue, case=False, na=False)]

    # Check if any cases are found
    if relevant_cases.empty:
        print(f"No matches found for issue: '{issue}'. Please verify the input.")
    else:
        print(f"Found {len(relevant_cases)} matching cases for issue: '{issue}'.")

    return relevant_cases[['case title', 'cited cases']]

# Example usage
issue = "murder"
try:
    citations = retrieve_citations(issue, df)
    print("Retrieved Citations:")
    print(citations)
except ValueError as e:
    print(e)


Standardized columns: ['unnamed: 0', 'case title', 'judges name(s)', 'date of judgment', 'citation', 'issues', 'decision', 'cited cases']
Sample of 'issues' column:
0    Article 14 in The Constitution Of India 1949 ;...
1    Article 226 in The Constitution Of India 1949 ...
2       Section 4 in The Land Acquisition Act, 1894 ; 
3    Article 226 in The Constitution Of India 1949 ...
4                                                  NaN
Name: issues, dtype: object
No matches found for issue: 'murder'. Please verify the input.
Retrieved Citations:
Empty DataFrame
Columns: [case title, cited cases]
Index: []


In [81]:
!pip install faiss-cpu



In [82]:
import faiss
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

def setup_faiss_index(dataset, column_name):
    """
    Create a FAISS index for the specified column in the dataset.
    """
    # Standardize column names
    dataset.columns = dataset.columns.str.strip().str.lower()
    print("Standardized columns:", dataset.columns.tolist())

    if column_name not in dataset.columns:
        raise ValueError(f"The '{column_name}' column is missing. Check your dataset.")

    # Extract text from the column
    text_data = dataset[column_name].dropna().tolist()

    # Use TF-IDF to vectorize the text
    vectorizer = TfidfVectorizer(max_features=512)  # Limit the number of features for efficiency
    vectors = vectorizer.fit_transform(text_data).toarray()

    # Create a FAISS index
    dimension = vectors.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(vectors).astype('float32'))

    return index, vectorizer, text_data

def faiss_search(index, vectorizer, query, text_data, top_k=5):
    """
    Perform a FAISS search to find the most similar items to the query.
    """
    query_vector = vectorizer.transform([query]).toarray().astype('float32')
    distances, indices = index.search(query_vector, top_k)
    results = [(text_data[idx], distances[0][i]) for i, idx in enumerate(indices[0])]
    return results

# Step 1: Set up FAISS index for the 'issues' column
try:
    index, vectorizer, text_data = setup_faiss_index(df, 'issues')
except ValueError as e:
    print(e)
    exit()

# Step 2: Perform a FAISS search for the query
query = "murder"
results = faiss_search(index, vectorizer, query, text_data, top_k=5)
print("\nFAISS Search Results:")
for result, distance in results:
    print(f"Issue: {result}, Distance: {distance}")

# Step 3: Filter dataset based on FAISS results and retrieve citations
def retrieve_citations_faiss(results, dataset):
    # Extract matching issues from FAISS results
    matching_issues = [result for result, _ in results]

    # Filter the dataset for rows containing matching issues
    relevant_cases = dataset[dataset['issues'].isin(matching_issues)]

    if relevant_cases.empty:
        print(f"No matches found for issues: {matching_issues}. Please verify the input.")
    else:
        print(f"Found {len(relevant_cases)} matching cases for issues: {matching_issues}.")

    return relevant_cases[['case title', 'cited cases']]

try:
    citations = retrieve_citations_faiss(results, df)
    print("\nRetrieved Citations:")
    print(citations)
except ValueError as e:
    print(e)


Standardized columns: ['unnamed: 0', 'case title', 'judges name(s)', 'date of judgment', 'citation', 'issues', 'decision', 'cited cases']

FAISS Search Results:
Issue: article (whether before or after the commencement of section 55 of the Constitution ; , Distance: 0.9999998807907104
Issue: rule 56 a fundamental rules ; , Distance: 0.9999998807907104
Issue: Section 15 in The Registration Act, 1908 ; Section 16 in The Registration Act, 1908 ; Section 21 in The General Clauses Act, 1897 ; , Distance: 0.9999998807907104
Issue: Section 4 in The Payment of Gratuity Act, 1972 ; , Distance: 0.9999998807907104
Issue: Section 6 in The General Clauses Act, 1897 ; Section 14 in The General Clauses Act, 1897 ; , Distance: 0.9999998807907104
Found 8 matching cases for issues: ['article (whether before or after the commencement of section 55 of the Constitution ; ', 'rule 56 a fundamental rules ; ', 'Section 15 in The Registration Act, 1908 ; Section 16 in The Registration Act, 1908 ; Section 21 in 

In [83]:
import openai
import re
import pandas as pd

# Step 1: Set up OpenAI API key
openai.api_key = "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"

# Step 2: Function to extract IPC sections using regex
def extract_ipc_sections(text):
    """
    Extract IPC sections from the provided text using regex.
    """
    pattern = r"Section\s\d+"
    matches = re.findall(pattern, text)
    return matches

# Step 3: Query OpenAI API to generate IPC descriptions
def get_ipc_details(section):
    """
    Use OpenAI API to generate IPC section details.
    """
    try:
        prompt = f"Provide the legal description and context for {section} of the Indian Penal Code (IPC)."
        response = openai.Completion.create(
            engine="text-davinci-003",  # Change engine based on your requirements
            prompt=prompt,
            max_tokens=150,
            temperature=0.5
        )
        description = response.choices[0].text.strip()
        return description
    except Exception as e:
        print(f"Error fetching details for {section}: {e}")
        return "Description unavailable."

# Step 4: Map IPC sections to descriptions
def map_ipc_sections_to_case(case_text):
    """
    Extract IPC sections from the case text and map them to their descriptions.
    """
    ipc_sections = extract_ipc_sections(case_text)
    ipc_mapping = {}
    for section in ipc_sections:
        if section not in ipc_mapping:  # Avoid duplicate API calls
            ipc_mapping[section] = get_ipc_details(section)
    return ipc_mapping

# Step 5: Apply the process to a dataset
def process_case_file(dataset, text_column='issues'):
    """
    Process the dataset to extract and map IPC sections for each case.
    """
    dataset['ipc_mappings'] = dataset[text_column].apply(map_ipc_sections_to_case)
    return dataset

# Step 6: Example case dataset
data = {
    "case_title": ["Case A", "Case B"],
    "issues": [
        "This case involves Section 302 and Section 307 for murder and attempted murder.",
        "Fraudulent activities under Section 420 are highlighted in this case."
    ]
}
df = pd.DataFrame(data)

# Process the dataset
df = process_case_file(df, text_column='issues')

# Step 7: Display results
print("\nProcessed Dataset with IPC Mappings:")
print(df[['case_title', 'ipc_mappings']])


Error fetching details for Section 302: The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations
Error fetching details for Section 307: The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations
Error fetching details for Section 420: The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations

Processed Dataset with IPC Mappings:
  case_title                                       ipc_mappings
0     Case A  {'Section 302': 'Description unavailable.', 'S...
1     Case B        {'Section 420': 'Description unavailable.'}


In [84]:
!pip install --upgrade openai




In [87]:
def draft_petition(case_title, summary, citations, ipc_sections, prayer, interim_relief=None):
    petition = f"""
    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA

    TITLE: {case_title}

    **Facts of the Case:**
    {summary}

    **Relevant Citations:**
    {citations.to_string(index=False)}

    **Applicable Legal Provisions:**
    {", ".join(ipc_sections)}

    **Prayer Clause:**
    {prayer}

    """
    if interim_relief:
        petition += f"\n**Interim Relief Sought:**\n{interim_relief}"

    return petition

# Example Usage
case_title = "Sample Case Title"
prayer = "The petitioner prays for appropriate relief, including compensation."
interim_relief = "Stay order on eviction."

petition = draft_petition(case_title, summary, citations, ipc_sections, prayer, interim_relief)
print(petition)



    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA
    
    TITLE: Sample Case Title

    **Facts of the Case:**
    the petitioner seeks relief regarding a land dispute . the lower court ruled in favor of the respondent despite contradictory evidence .

    **Relevant Citations:**
                                                                 case title                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                

In [88]:
!pip install openai==0.28

Collecting openai==0.28
  Using cached openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Using cached openai-0.28.0-py3-none-any.whl (76 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.59.5
    Uninstalling openai-1.59.5:
      Successfully uninstalled openai-1.59.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
exa-py 1.7.1 requires openai>=1.10.0, but you have openai 0.28.0 which is incompatible.[0m[31m
[0mSuccessfully installed openai-0.28.0


In [92]:
import openai
import subprocess
import pandas as pd

# Step 1: Function to run `openai migrate`
def run_openai_migrate():
    """
    Runs the `openai migrate` command to upgrade the codebase.
    """
    try:
        result = subprocess.run(
            ["openai", "migrate"],
            capture_output=True,
            text=True,
            check=True
        )
        print("Migration Output:\n", result.stdout)
    except subprocess.CalledProcessError as e:
        print("Migration Failed:\n", e.stderr)
        return False
    return True

# Step 2: Function to fetch IPC section details
def fetch_ipc_section_details(section):
    """
    Fetch details for an IPC section using the OpenAI API.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a legal expert."},
                {"role": "user", "content": f"Provide details about IPC Section {section}."}
            ]
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error fetching details for Section {section}: {e}")
        return f"Description unavailable for Section {section}."

# Step 3: Map IPC sections to cases
def map_ipc_sections(dataset, section_column='issues'):
    """
    Map IPC sections to cases in the dataset.
    """
    def map_issues_to_ipc(issue_text):
        """
        Map issues to IPC sections and fetch details for each section.
        """
        if not isinstance(issue_text, str):
            return "No relevant IPC sections found."

        # Example IPC sections for mapping
        ipc_sections = ["302", "307", "420"]  # Add more as needed
        ipc_details = {}
        for section in ipc_sections:
            if section in issue_text:
                ipc_details[f"Section {section}"] = fetch_ipc_section_details(section)
        return ipc_details if ipc_details else "No relevant IPC sections found."

    # Apply the mapping to each row in the dataset
    dataset['ipc_mappings'] = dataset[section_column].apply(map_issues_to_ipc)
    return dataset

# Step 4: Generate a petition from IPC mappings
def generate_petition(ipc_mappings):
    """
    Generate petitions based on IPC mappings.
    """
    petitions = []
    for _, row in ipc_mappings.iterrows():
        case_title = row['case_title']
        ipc_details = row['ipc_mappings']

        # Generate petition text
        petition_text = f"Petition for {case_title}:\n\n"
        if isinstance(ipc_details, dict):
            for section, description in ipc_details.items():
                petition_text += f"{section}: {description}\n"
        else:
            petition_text += "No IPC details available for this case.\n"

        petitions.append({"case_title": case_title, "petition": petition_text})
    return petitions

# Step 5: Main Execution
def main():
    # Run `openai migrate` before proceeding
    migration_success = run_openai_migrate()
    if not migration_success:
        print("Migration failed. Exiting.")
        return

    # Sample dataset (replace with actual dataset loading)
    data = {
        "case_title": ["Case A", "Case B"],
        "issues": ["Section 302, Section 307", "Section 420"]
    }
    df = pd.DataFrame(data)

    # Preprocess dataset
    df = preprocess_dataset(df)

    # Map IPC sections to cases
    processed_df = map_ipc_sections(df)

    # Generate petitions
    petitions = generate_petition(processed_df)

    # Output the results
    print("\nProcessed Dataset with IPC Mappings:")
    print(processed_df)

    print("\nGenerated Petitions:")
    for petition in petitions:
        print(f"Case Title: {petition['case_title']}\n")
        print(f"Petition:\n{petition['petition']}\n")
        print("=" * 80)

# Execute the script
if __name__ == "__main__":
    main()


Migration Failed:
 usage: openai [-h] [-V] [-v] [-b API_BASE] [-k API_KEY] [-p PROXY [PROXY ...]] [-o ORGANIZATION]
              {api,tools,wandb} ...
openai: error: argument {api,tools,wandb}: invalid choice: 'migrate' (choose from 'api', 'tools', 'wandb')

Migration failed. Exiting.


In [93]:
import openai
import pandas as pd

# Step 1: Fetch IPC Section Details
def fetch_ipc_section_details(section):
    """
    Fetch details for an IPC section using the OpenAI API.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a legal expert."},
                {"role": "user", "content": f"Provide details about IPC Section {section}."}
            ]
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error fetching details for Section {section}: {e}")
        return f"Description unavailable for Section {section}."

# Step 2: Map IPC Sections to Cases
def map_ipc_sections(dataset, section_column='issues'):
    """
    Map IPC sections to cases in the dataset.
    """
    def map_issues_to_ipc(issue_text):
        """
        Map issues to IPC sections and fetch details for each section.
        """
        if not isinstance(issue_text, str):
            return "No relevant IPC sections found."

        # Example IPC sections for mapping
        ipc_sections = ["302", "307", "420"]  # Add more as needed
        ipc_details = {}
        for section in ipc_sections:
            if section in issue_text:
                ipc_details[f"Section {section}"] = fetch_ipc_section_details(section)
        return ipc_details if ipc_details else "No relevant IPC sections found."

    # Apply the mapping to each row in the dataset
    dataset['ipc_mappings'] = dataset[section_column].apply(map_issues_to_ipc)
    return dataset

# Step 3: Generate a petition from IPC mappings
def generate_petition(ipc_mappings):
    """
    Generate petitions based on IPC mappings.
    """
    petitions = []
    for _, row in ipc_mappings.iterrows():
        case_title = row['case_title']
        ipc_details = row['ipc_mappings']

        # Generate petition text
        petition_text = f"Petition for {case_title}:\n\n"
        if isinstance(ipc_details, dict):
            for section, description in ipc_details.items():
                petition_text += f"{section}: {description}\n"
        else:
            petition_text += "No IPC details available for this case.\n"

        petitions.append({"case_title": case_title, "petition": petition_text})
    return petitions

# Step 4: Main Execution
def main():
    # Sample dataset (replace with actual dataset loading)
    data = {
        "case_title": ["Case A", "Case B"],
        "issues": ["Section 302, Section 307", "Section 420"]
    }
    df = pd.DataFrame(data)

    # Preprocess dataset
    df.columns = df.columns.str.strip().str.lower()  # Standardize column names

    # Map IPC sections to cases
    processed_df = map_ipc_sections(df)

    # Generate petitions
    petitions = generate_petition(processed_df)

    # Output the results
    print("\nProcessed Dataset with IPC Mappings:")
    print(processed_df)

    print("\nGenerated Petitions:")
    for petition in petitions:
        print(f"Case Title: {petition['case_title']}\n")
        print(f"Petition:\n{petition['petition']}\n")
        print("=" * 80)

# Execute the script
if __name__ == "__main__":
    main()


Error fetching details for Section 302: The model `gpt-4` does not exist or you do not have access to it.
Error fetching details for Section 307: The model `gpt-4` does not exist or you do not have access to it.
Error fetching details for Section 420: The model `gpt-4` does not exist or you do not have access to it.

Processed Dataset with IPC Mappings:
  case_title                    issues  \
0     Case A  Section 302, Section 307   
1     Case B               Section 420   

                                        ipc_mappings  
0  {'Section 302': 'Description unavailable for S...  
1  {'Section 420': 'Description unavailable for S...  

Generated Petitions:
Case Title: Case A

Petition:
Petition for Case A:

Section 302: Description unavailable for Section 302.
Section 307: Description unavailable for Section 307.


Case Title: Case B

Petition:
Petition for Case B:

Section 420: Description unavailable for Section 420.




In [94]:
import openai
import pandas as pd

# Set your OpenAI API key
openai.api_key = "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"  # Replace <YOUR_API_KEY> with your actual OpenAI API key

# Step 1: Fetch IPC Section Details
def fetch_ipc_section_details(section):
    """
    Fetch details for an IPC section using the OpenAI API.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a legal expert."},
                {"role": "user", "content": f"Provide details about IPC Section {section}."}
            ]
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error fetching details for Section {section}: {e}")
        return f"Description unavailable for Section {section}."

# Step 2: Map IPC Sections to Cases
def map_ipc_sections(dataset, section_column='issues'):
    """
    Map IPC sections to cases in the dataset.
    """
    def map_issues_to_ipc(issue_text):
        """
        Map issues to IPC sections and fetch details for each section.
        """
        if not isinstance(issue_text, str):
            return "No relevant IPC sections found."

        # Example IPC sections for mapping
        ipc_sections = ["302", "307", "420"]  # Add more as needed
        ipc_details = {}
        for section in ipc_sections:
            if section in issue_text:
                ipc_details[f"Section {section}"] = fetch_ipc_section_details(section)
        return ipc_details if ipc_details else "No relevant IPC sections found."

    # Apply the mapping to each row in the dataset
    dataset['ipc_mappings'] = dataset[section_column].apply(map_issues_to_ipc)
    return dataset

# Step 3: Generate a petition from IPC mappings
def generate_petition(ipc_mappings):
    """
    Generate petitions based on IPC mappings.
    """
    petitions = []
    for _, row in ipc_mappings.iterrows():
        case_title = row['case_title']
        ipc_details = row['ipc_mappings']

        # Generate petition text
        petition_text = f"Petition for {case_title}:\n\n"
        if isinstance(ipc_details, dict):
            for section, description in ipc_details.items():
                petition_text += f"{section}: {description}\n"
        else:
            petition_text += "No IPC details available for this case.\n"

        petitions.append({"case_title": case_title, "petition": petition_text})
    return petitions

# Step 4: Main Execution
def main():
    # Sample dataset (replace with actual dataset loading)
    data = {
        "case_title": ["Case A", "Case B"],
        "issues": ["Section 302, Section 307", "Section 420"]
    }
    df = pd.DataFrame(data)

    # Preprocess dataset
    df.columns = df.columns.str.strip().str.lower()  # Standardize column names

    # Map IPC sections to cases
    processed_df = map_ipc_sections(df)

    # Generate petitions
    petitions = generate_petition(processed_df)

    # Output the results
    print("\nProcessed Dataset with IPC Mappings:")
    print(processed_df)

    print("\nGenerated Petitions:")
    for petition in petitions:
        print(f"Case Title: {petition['case_title']}\n")
        print(f"Petition:\n{petition['petition']}\n")
        print("=" * 80)

# Execute the script
if __name__ == "__main__":
    main()


Error fetching details for Section 302: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error fetching details for Section 307: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error fetching details for Section 420: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

Processed Dataset with IPC Mappings:
  case_title                    issues  \
0     Case A  Section 302, Section 307   
1     Case B               Section 420   

                                        ipc_mappings  
0  {'Section 302': 'Description unavailable for S...  
1  {'Section 420': 'Descripti

In [95]:
import openai
import pandas as pd

# Set your OpenAI API key
openai.api_key = "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"  # Replace with your actual OpenAI API key

# Function to fetch IPC section details
def fetch_ipc_section_details(section):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a legal expert."},
                {"role": "user", "content": f"Provide a detailed explanation and implications of IPC Section {section}."}
            ]
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error fetching details for Section {section}: {e}")
        return f"Description unavailable for Section {section}."

# Function to map IPC sections to cases
def map_ipc_sections(dataset, section_column='issues'):
    def map_issues_to_ipc(issue_text):
        if not isinstance(issue_text, str):
            return {}
        ipc_sections = [int(s.strip()) for s in issue_text.split(',') if s.strip().isdigit()]
        ipc_details = {f"Section {section}": fetch_ipc_section_details(section) for section in ipc_sections}
        return ipc_details

    dataset['ipc_mappings'] = dataset[section_column].apply(map_issues_to_ipc)
    return dataset

# Function to draft a detailed petition
def draft_petition(case_title, summary, ipc_details, prayer, interim_relief=None):
    petition = f"""
    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA

    TITLE: {case_title}

    **Facts of the Case:**
    {summary}

    **Applicable Legal Provisions:**
    """
    for section, description in ipc_details.items():
        petition += f"\n- {section}: {description}\n"

    petition += f"\n**Prayer Clause:**\n{prayer}\n"

    if interim_relief:
        petition += f"\n**Interim Relief Sought:**\n{interim_relief}"

    return petition

# Function to generate petitions for all cases
def generate_petitions(ipc_mappings):
    petitions = []
    for _, row in ipc_mappings.iterrows():
        case_title = row['case_title']
        ipc_details = row['ipc_mappings']
        summary = f"This petition concerns the issues arising from the application of IPC sections related to the case titled '{case_title}'."
        prayer = "The petitioner prays for appropriate relief, including justice and compensation."
        interim_relief = "An immediate stay on the proceedings impacting the petitioner."

        petition_text = draft_petition(case_title, summary, ipc_details, prayer, interim_relief)
        petitions.append({"case_title": case_title, "petition": petition_text})
    return petitions

# Main execution
def main():
    # Sample dataset (replace with actual dataset loading)
    data = {
        "case_title": ["Case A", "Case B"],
        "issues": ["302, 307", "420"]
    }
    df = pd.DataFrame(data)

    # Preprocess dataset
    df.columns = df.columns.str.strip().str.lower()

    # Map IPC sections to cases
    processed_df = map_ipc_sections(df)

    # Generate detailed petitions
    petitions = generate_petitions(processed_df)

    # Output the results
    print("\nProcessed Dataset with IPC Mappings:")
    print(processed_df)

    print("\nGenerated Petitions:")
    for petition in petitions:
        print(f"Case Title: {petition['case_title']}\n")
        print(f"Petition:\n{petition['petition']}\n")
        print("=" * 80)

# Execute the script
if __name__ == "__main__":
    main()


Error fetching details for Section 302: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error fetching details for Section 307: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error fetching details for Section 420: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

Processed Dataset with IPC Mappings:
  case_title    issues                                       ipc_mappings
0     Case A  302, 307  {'Section 302': 'Description unavailable for S...
1     Case B       420  {'Section 420': 'Description unavailable for S...

Generated Petitions:
Case Title: Case A

Pe

In [96]:
import openai
import pandas as pd

# Set your OpenAI API key
openai.api_key = "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"  # Replace with your actual OpenAI API key

# Function to fetch IPC section details
def fetch_ipc_section_details(section):
    """
    Fetch detailed information about an IPC section using the OpenAI API.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a legal expert."},
                {"role": "user", "content": f"Provide a comprehensive explanation, historical context, and implications of IPC Section {section}."}
            ]
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error fetching details for Section {section}: {e}")
        return f"Description unavailable for Section {section}."

# Function to map IPC sections to cases
def map_ipc_sections(dataset, section_column='issues'):
    """
    Map IPC sections mentioned in issues to detailed descriptions using OpenAI.
    """
    def map_issues_to_ipc(issue_text):
        if not isinstance(issue_text, str):
            return {}
        ipc_sections = [int(s.strip()) for s in issue_text.split(',') if s.strip().isdigit()]
        ipc_details = {f"Section {section}": fetch_ipc_section_details(section) for section in ipc_sections}
        return ipc_details

    dataset['ipc_mappings'] = dataset[section_column].apply(map_issues_to_ipc)
    return dataset

# Function to draft a detailed petition
def draft_petition(case_title, summary, ipc_details, prayer, interim_relief=None):
    """
    Generate a detailed, court-ready petition.
    """
    petition = f"""
    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA

    **CASE TITLE:** {case_title}

    **FACTS OF THE CASE:**
    1. The petitioner is aggrieved by the actions/inactions that have led to this legal recourse.
    2. The primary issues revolve around the following legal and factual matters:
       - {summary}
    3. The actions of the respondent(s) are in direct violation of the fundamental rights guaranteed under the Constitution of India.

    **APPLICABLE LEGAL PROVISIONS:**
    The following sections of the Indian Penal Code (IPC) are directly relevant to this case:
    """
    for section, description in ipc_details.items():
        petition += f"\n- **{section}:**\n  {description}\n"

    petition += f"""
    **PRAYER CLAUSE:**
    The petitioner respectfully prays for the following reliefs:
    1. Issue appropriate writ(s) under Article 32/226 of the Constitution directing the respondent(s) to take remedial actions as per the law.
    2. Grant monetary compensation or reparations to the petitioner for the irreparable damage caused.
    3. Pass any other order(s) as deemed fit and appropriate by this Hon'ble Court.

    **INTERIM RELIEF (IF ANY):**
    """
    if interim_relief:
        petition += f"  - {interim_relief}\n"
    else:
        petition += "  - No interim relief sought at this stage.\n"

    petition += """
    **CITATIONS AND REFERENCES:**
    1. Previous judicial precedents and relevant case laws will be submitted during the hearing as per the court's directions.
    2. Supporting legal interpretations and documentation are annexed herewith.
    """

    return petition

# Function to generate petitions for all cases
def generate_petitions(ipc_mappings):
    """
    Generate petitions for each case based on IPC mappings.
    """
    petitions = []
    for _, row in ipc_mappings.iterrows():
        case_title = row['case_title']
        ipc_details = row['ipc_mappings']
        summary = f"This case pertains to violations involving IPC sections and legal provisions relevant to the petitioner's grievances."
        prayer = "Grant relief as deemed appropriate, including compensation and necessary directives to the respondents."
        interim_relief = "Grant a stay on the disputed actions or proceedings impacting the petitioner."

        petition_text = draft_petition(case_title, summary, ipc_details, prayer, interim_relief)
        petitions.append({"case_title": case_title, "petition": petition_text})
    return petitions

# Main execution
def main():
    # Sample dataset (replace with actual dataset loading)
    data = {
        "case_title": ["Case A", "Case B"],
        "issues": ["302, 307", "420"]
    }
    df = pd.DataFrame(data)

    # Preprocess dataset
    df.columns = df.columns.str.strip().str.lower()

    # Map IPC sections to cases
    processed_df = map_ipc_sections(df)

    # Generate detailed petitions
    petitions = generate_petitions(processed_df)

    # Output the results
    print("\nProcessed Dataset with IPC Mappings:")
    print(processed_df)

    print("\nGenerated Petitions:")
    for petition in petitions:
        print(f"Case Title: {petition['case_title']}\n")
        print(f"Petition:\n{petition['petition']}\n")
        print("=" * 80)

# Execute the script
if __name__ == "__main__":
    main()


Error fetching details for Section 302: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error fetching details for Section 307: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error fetching details for Section 420: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

Processed Dataset with IPC Mappings:
  case_title    issues                                       ipc_mappings
0     Case A  302, 307  {'Section 302': 'Description unavailable for S...
1     Case B       420  {'Section 420': 'Description unavailable for S...

Generated Petitions:
Case Title: Case A

Pe

In [97]:
import pandas as pd

# Step 1: Fetch IPC Section Details Locally
ipc_section_dict = {
    140: {
        "description": "If someone who is not a military member wears a uniform or carries something resembling a military uniform to deceive others into believing they are a soldier, sailor, or airman, they can be punished with up to three months in jail, a fine of up to five hundred rupees, or both.",
        "offense": "Wearing the dress or carrying any token used by a soldier, sailor or airman with intent that it may be believed that he is such a soldier, sailor or airman.",
        "punishment": "3 Months or Fine or Both."
    },
    127: {
        "description": "If someone receives property knowing it was taken during the commission of certain offenses (mentioned in sections 125 and 126), they can be punished with imprisonment of up to seven years, fined, and the property can be forfeited.",
        "offense": "Receiving property taken by war or depredation mentioned in sections 125 and 126.",
        "punishment": "7 Years + Fine + forfeiture of property."
    },
    # Add more sections here...
}

# Step 2: Map IPC Sections to Cases
def map_ipc_sections(dataset, section_column='issues'):
    """
    Map IPC sections to cases in the dataset using local data.
    """
    def map_issues_to_ipc(issue_text):
        """
        Map issues to IPC sections and fetch details for each section.
        """
        if not isinstance(issue_text, str):
            return "No relevant IPC sections found."

        # Example IPC sections for mapping
        ipc_sections = [int(s.strip()) for s in issue_text.split(',') if s.strip().isdigit()]
        ipc_details = {}
        for section in ipc_sections:
            if section in ipc_section_dict:
                ipc_details[f"Section {section}"] = ipc_section_dict[section]
        return ipc_details if ipc_details else "No relevant IPC sections found."

    # Apply the mapping to each row in the dataset
    dataset['ipc_mappings'] = dataset[section_column].apply(map_issues_to_ipc)
    return dataset

# Step 3: Draft a Petition
def draft_petition(case_title, summary, ipc_details, prayer, interim_relief=None):
    petition = f"""
    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA

    TITLE: {case_title}

    **Facts of the Case:**
    {summary}

    **Applicable Legal Provisions:**
    """

    # Add IPC sections and their details to the petition
    for section, details in ipc_details.items():
        petition += f"\n{section} - {details.get('offense', 'Offense details not available')}"
        petition += f"\nPunishment: {details.get('punishment', 'Punishment details not available')}"
        petition += f"\nCategory: {details.get('category', 'Category details not available')}\n"

    petition += f"\n**Prayer Clause:**\n{prayer}"

    if interim_relief:
        petition += f"\n\n**Interim Relief Sought:**\n{interim_relief}"

    return petition  # Ensure this is within the function


# Step 4: Main Execution
def main():
    # Sample dataset (replace with actual dataset loading)
    data = {
        "case_title": ["Case A", "Case B"],
        "issues": ["140, 127", "127"]
    }
    df = pd.DataFrame(data)

    # Preprocess dataset
    df.columns = df.columns.str.strip().str.lower()  # Standardize column names

    # Map IPC sections to cases
    processed_df = map_ipc_sections(df)

    # Generate petitions
    for _, row in processed_df.iterrows():
        case_title = row['case_title']
        ipc_details = row['ipc_mappings']

        # Draft a petition for the case
        petition = draft_petition(
            case_title=case_title,
            summary="Detailed summary of the case goes here.",
            ipc_details=ipc_details,
            prayer="The petitioner prays for appropriate relief, including compensation.",
            interim_relief="Stay order on eviction."
        )
        print(petition)
        print("=" * 80)

# Execute the script
if __name__ == "__main__":
    main()



    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA
    
    TITLE: Case A

    **Facts of the Case:**
    Detailed summary of the case goes here.

    **Applicable Legal Provisions:**
    
Section 140 - Wearing the dress or carrying any token used by a soldier, sailor or airman with intent that it may be believed that he is such a soldier, sailor or airman.
Punishment: 3 Months or Fine or Both.
Category: Category details not available

Section 127 - Receiving property taken by war or depredation mentioned in sections 125 and 126.
Punishment: 7 Years + Fine + forfeiture of property.
Category: Category details not available

**Prayer Clause:**
The petitioner prays for appropriate relief, including compensation.

**Interim Relief Sought:**
Stay order on eviction.

    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONS

In [98]:
import pandas as pd

# Step 1: Local IPC Section Dictionary with Details
ipc_section_dict = {
    140: {
        "description": "If someone who is not a military member wears a uniform or carries something resembling a military uniform to deceive others into believing they are a soldier, sailor, or airman, they can be punished with up to three months in jail, a fine of up to five hundred rupees, or both.",
        "offense": "Wearing the dress or carrying any token used by a soldier, sailor or airman with intent that it may be believed that he is such a soldier, sailor or airman.",
        "punishment": "3 Months or Fine or Both."
    },
    127: {
        "description": "If someone receives property knowing it was taken during the commission of certain offenses (mentioned in sections 125 and 126), they can be punished with imprisonment of up to seven years, fined, and the property can be forfeited.",
        "offense": "Receiving property taken by war or depredation mentioned in sections 125 and 126.",
        "punishment": "7 Years + Fine + forfeiture of property."
    },
    # Add more sections here with structured details
}

# Step 2: Map IPC Sections to Cases
def map_ipc_sections(dataset, section_column='issues'):
    """
    Process the dataset to map IPC sections to corresponding legal details.

    1. **Inputs**:
        - `dataset`: A pandas DataFrame containing case titles and IPC sections.
        - `section_column`: The column in the dataset where IPC sections are listed.

    2. **Processing**:
        - Split the IPC section numbers in the `issues` column (e.g., "140, 127").
        - Match each section with its corresponding details from `ipc_section_dict`.

    3. **Outputs**:
        - Adds a new column `ipc_mappings` to the dataset, containing detailed IPC mappings.
    """
    def map_issues_to_ipc(issue_text):
        """
        Map issues (text) to detailed IPC section details.

        **Details**:
        - Extract IPC section numbers from text.
        - Retrieve matching details from `ipc_section_dict`.
        """
        if not isinstance(issue_text, str):
            return "No relevant IPC sections found."

        # Extract numeric section IDs and map them to the IPC dictionary
        ipc_sections = [int(s.strip()) for s in issue_text.split(',') if s.strip().isdigit()]
        ipc_details = {}
        for section in ipc_sections:
            if section in ipc_section_dict:
                ipc_details[f"Section {section}"] = ipc_section_dict[section]
        return ipc_details if ipc_details else "No relevant IPC sections found."

    # Apply mapping to each row in the dataset
    dataset['ipc_mappings'] = dataset[section_column].apply(map_issues_to_ipc)
    return dataset

# Step 3: Draft a Petition
def draft_petition(case_title, summary, ipc_details, prayer, interim_relief=None):
    """
    Generate a detailed legal petition for court submission.

    **Parameters**:
    1. `case_title`: The title of the case.
    2. `summary`: A detailed summary of the facts of the case.
    3. `ipc_details`: A dictionary containing IPC section details for the case.
    4. `prayer`: The relief sought by the petitioner.
    5. `interim_relief`: Optional interim relief requested by the petitioner.

    **Structure**:
    - **Header**: Supreme Court jurisdiction and petition type.
    - **Case Facts**: Detailed description of the case.
    - **IPC Details**: Point-by-point breakdown of offenses, punishments, and descriptions.
    - **Prayer Clause**: The relief sought.
    - **Interim Relief (Optional)**: Any temporary relief required.
    """
    petition = f"""
    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA

    TITLE: {case_title}

    **Facts of the Case:**
    {summary}

    **Applicable Legal Provisions:**
    """

    # Add IPC sections and their details to the petition
    for section, details in ipc_details.items():
        petition += f"\n- {section}:"
        petition += f"\n  - **Offense**: {details.get('offense', 'Offense details not available')}"
        petition += f"\n  - **Punishment**: {details.get('punishment', 'Punishment details not available')}"
        petition += f"\n  - **Description**: {details.get('description', 'Description not available')}"

    petition += f"\n\n**Prayer Clause:**\n- {prayer}"

    if interim_relief:
        petition += f"\n\n**Interim Relief Sought:**\n- {interim_relief}"

    return petition

# Step 4: Main Execution
def main():
    """
    Main function to process cases and generate detailed legal petitions.

    **Workflow**:
    1. Create a sample dataset with case titles and associated IPC sections.
    2. Map IPC sections to their legal details using `map_ipc_sections`.
    3. Generate detailed petitions for each case using `draft_petition`.
    """
    # Sample dataset (replace with actual dataset loading)
    data = {
        "case_title": ["Case A", "Case B"],
        "issues": ["140, 127", "127"]
    }
    df = pd.DataFrame(data)

    # Preprocess dataset
    df.columns = df.columns.str.strip().str.lower()  # Standardize column names

    # Map IPC sections to cases
    processed_df = map_ipc_sections(df)

    # Generate petitions for each case
    for _, row in processed_df.iterrows():
        case_title = row['case_title']
        ipc_details = row['ipc_mappings']

        # Draft a petition for the case
        petition = draft_petition(
            case_title=case_title,
            summary="Detailed summary of the case goes here.",
            ipc_details=ipc_details,
            prayer="The petitioner prays for appropriate relief, including compensation.",
            interim_relief="Stay order on eviction."
        )
        print(petition)
        print("=" * 80)

# Execute the script
if __name__ == "__main__":
    main()



    IN THE HON'BLE SUPREME COURT OF INDIA
    CIVIL/APPELLATE/CRIMINAL JURISDICTION

    PETITION UNDER ARTICLE 32/226 OF THE CONSTITUTION OF INDIA
    
    TITLE: Case A

    **Facts of the Case:**
    Detailed summary of the case goes here.

    **Applicable Legal Provisions:**
    
- Section 140:
  - **Offense**: Wearing the dress or carrying any token used by a soldier, sailor or airman with intent that it may be believed that he is such a soldier, sailor or airman.
  - **Punishment**: 3 Months or Fine or Both.
  - **Description**: If someone who is not a military member wears a uniform or carries something resembling a military uniform to deceive others into believing they are a soldier, sailor, or airman, they can be punished with up to three months in jail, a fine of up to five hundred rupees, or both.
- Section 127:
  - **Offense**: Receiving property taken by war or depredation mentioned in sections 125 and 126.
  - **Punishment**: 7 Years + Fine + forfeiture of property.
  -

In [100]:
import pandas as pd

# Define the dataset
data = {
    "Case Title": [
        "S.C Jain v. Union of India",
        "Aandi v. Superintendent of Police",
        "Pushpa v. Maharashtra",
        "Panchabhai v. Maharashtra",
        "Prabodh Verma v. State of Uttar Pradesh"
    ],
    "Legal Context": [
        "Payment Arrears",
        "Further Investigation",
        "Pension & Salary",
        "Criminal Liability",
        "Employee Rights"
    ],
    "Facts": [
        "Arrears due to delayed payment",
        "Request for detailed investigation",
        "Denial of salary and pension",
        "Joint actions in furtherance of intention",
        "Violation of employee rights"
    ],
    "Prayer Clause": [
        "Direct the respondent to release the arrears of payment with interest.",
        "Order a detailed investigation under Section 173(8) of CrPC.",
        "Direct the respondent to pay the salary and pension due, along with applicable interest.",
        "Grant an injunction against the respondent and hold them liable under Section 34 of IPC.",
        "Declare the suspension of the petitioner unlawful and direct reinstatement with full pay arrears."
    ]
}

# Create the DataFrame
prayer_clauses_df = pd.DataFrame(data)

# Save to CSV
output_file = "/mnt/data/prayer_clauses_judgments.csv"
prayer_clauses_df.to_csv(output_file, index=False)
output_file

'/mnt/data/prayer_clauses_judgments.csv'

In [101]:
import pandas as pd

# Load the CSV file
file_path = '/mnt/data/prayer_clauses_judgments.csv'
data = pd.read_csv(file_path)

# Preview the data
print(data.head())


                                Case Title          Legal Context  \
0               S.C Jain v. Union of India        Payment Arrears   
1        Aandi v. Superintendent of Police  Further Investigation   
2                    Pushpa v. Maharashtra       Pension & Salary   
3                Panchabhai v. Maharashtra     Criminal Liability   
4  Prabodh Verma v. State of Uttar Pradesh        Employee Rights   

                                       Facts  \
0             Arrears due to delayed payment   
1         Request for detailed investigation   
2               Denial of salary and pension   
3  Joint actions in furtherance of intention   
4               Violation of employee rights   

                                       Prayer Clause  
0  Direct the respondent to release the arrears o...  
1  Order a detailed investigation under Section 1...  
2  Direct the respondent to pay the salary and pe...  
3  Grant an injunction against the respondent and...  
4  Declare the suspen

In [102]:
import pandas as pd

# Define an expanded dataset with 100 cases
cases_data = {
    "Case Title": [
        f"Case {i} v. Respondent {i}" for i in range(1, 101)
    ],
    "Legal Context": [
        "Property Dispute" if i % 5 == 0 else
        "Criminal Appeal" if i % 5 == 1 else
        "Labor Dispute" if i % 5 == 2 else
        "Civil Rights" if i % 5 == 3 else
        "Constitutional Challenge"
        for i in range(1, 101)
    ],
    "Facts": [
        "Dispute over ownership and possession of property." if i % 5 == 0 else
        "Appeal against conviction and sentence passed by lower court." if i % 5 == 1 else
        "Non-payment of wages and unfair termination." if i % 5 == 2 else
        "Violation of fundamental rights under Article 14 and 21." if i % 5 == 3 else
        "Challenge to the constitutional validity of a legislative act."
        for i in range(1, 101)
    ],
    "Prayer Clause": [
        "Direct the respondent to transfer ownership and possession of the disputed property to the petitioner." if i % 5 == 0 else
        "Set aside the conviction and sentence and acquit the petitioner of all charges." if i % 5 == 1 else
        "Order the respondent to reinstate the petitioner and release all due wages with interest." if i % 5 == 2 else
        "Declare the actions of the respondent as unconstitutional and grant appropriate relief to the petitioner." if i % 5 == 3 else
        "Strike down the impugned legislative act as violative of the Constitution."
        for i in range(1, 101)
    ]
}

# Create a DataFrame
expanded_prayer_clauses_df = pd.DataFrame(cases_data)

# Save to CSV
output_file = "/mnt/data/expanded_prayer_clauses_judgments.csv"
expanded_prayer_clauses_df.to_csv(output_file, index=False)

print(f"Dataset with 100 cases saved to: {output_file}")


Dataset with 100 cases saved to: /mnt/data/expanded_prayer_clauses_judgments.csv


In [103]:
from torch.utils.data import Dataset

class PrayerClauseDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        """
        Initializes the dataset.

        Args:
            data (pd.DataFrame): A DataFrame containing 'Input' and 'Prayer Clause' columns.
            tokenizer: The tokenizer to process the text data.
            max_length (int): The maximum sequence length for tokenization.
        """
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        """
        Returns the total number of samples in the dataset.
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Returns a single sample at the specified index.

        Args:
            idx (int): The index of the sample.

        Returns:
            dict: A dictionary containing input IDs, attention masks, and labels.
        """
        input_text = self.data.iloc[idx]["Input"]
        label_text = self.data.iloc[idx]["Prayer Clause"]

        # Tokenize the input text
        inputs = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        # Tokenize the label text
        labels = self.tokenizer(
            label_text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "attention_mask": inputs["attention_mask"].squeeze(0),
            "labels": labels["input_ids"].squeeze(0),
        }


In [105]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer

# Load Dataset (Replace with actual file path or dataset)
data = {
    "Case Title": [
        "S.C Jain v. Union of India",
        "Aandi v. Superintendent of Police",
        "Pushpa v. Maharashtra",
        "Panchabhai v. Maharashtra",
        "Prabodh Verma v. State of Uttar Pradesh"
    ],
    "Facts": [
        "Arrears due to delayed payment",
        "Request for detailed investigation",
        "Denial of salary and pension",
        "Joint actions in furtherance of intention",
        "Violation of employee rights"
    ],
    "Prayer Clause": [
        "Direct the respondent to release the arrears of payment with interest.",
        "Order a detailed investigation under Section 173(8) of CrPC.",
        "Direct the respondent to pay the salary and pension due, along with applicable interest.",
        "Grant an injunction against the respondent and hold them liable under Section 34 of IPC.",
        "Declare the suspension of the petitioner unlawful and direct reinstatement with full pay arrears."
    ]
}

prayer_clauses_df = pd.DataFrame(data)

# Step 1: Standardize Column Names
prayer_clauses_df.columns = prayer_clauses_df.columns.str.strip().str.title()

# Step 2: Ensure Required Columns Exist
if "Input" not in prayer_clauses_df.columns:
    if "Case Title" in prayer_clauses_df.columns and "Facts" in prayer_clauses_df.columns:
        prayer_clauses_df["Input"] = prayer_clauses_df["Case Title"] + ": " + prayer_clauses_df["Facts"]
    else:
        raise ValueError("Cannot create 'Input' column: Missing 'Case Title' or 'Facts' columns.")

if "Prayer Clause" not in prayer_clauses_df.columns:
    raise ValueError("The dataset must contain a 'Prayer Clause' column.")

# Step 3: Split the Dataset
train_data, val_data = train_test_split(prayer_clauses_df, test_size=0.2, random_state=42)

# Step 4: Define a Custom Dataset Class
class PrayerClauseDataset:
    def __init__(self, dataframe, tokenizer, max_length=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        input_text = row["Input"]
        target_text = row["Prayer Clause"]

        encoding = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        target_encoding = self.tokenizer(
            target_text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "labels": target_encoding["input_ids"].squeeze()
        }

# Step 5: Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Step 6: Create Dataset Instances
train_dataset = PrayerClauseDataset(train_data, tokenizer)
val_dataset = PrayerClauseDataset(val_data, tokenizer)

# Step 7: Print Dataset Information
print(f"Train data size: {len(train_dataset)}")
print(f"Validation data size: {len(val_dataset)}")

# Example of fetching a single data point
example = train_dataset[0]
print("Example data point:")
print(f"Input IDs: {example['input_ids']}")
print(f"Attention Mask: {example['attention_mask']}")
print(f"Labels: {example['labels']}")


Train data size: 4
Validation data size: 1
Example data point:
Input IDs: tensor([  101, 10975,  7875,  7716,  2232,  2310, 17830,  1058,  1012,  2110,
         1997, 14940,  7970,  1024, 11371,  1997,  7904,  2916,   102,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     

In [106]:
!pip install transformers datasets




In [108]:
!pip install --upgrade transformers




In [109]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments

# Load T5 model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Define the TrainingArguments
training_args = TrainingArguments(
    output_dir="./prayer_clause_model",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
)

# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer.train()




Epoch,Training Loss,Validation Loss
1,No log,7.20547
2,No log,6.684239
3,No log,6.419907


TrainOutput(global_step=3, training_loss=9.878597259521484, metrics={'train_runtime': 58.7935, 'train_samples_per_second': 0.204, 'train_steps_per_second': 0.051, 'total_flos': 406025404416.0, 'train_loss': 9.878597259521484, 'epoch': 3.0})

In [110]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Seq2SeqTrainer, Seq2SeqTrainingArguments

# Load T5 model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Define Seq2SeqTrainingArguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./prayer_clause_model",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    predict_with_generate=True,  # Enable generation for predictions
    logging_dir="./logs",        # Logging directory
)

# Define Seq2SeqTrainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,  # Pass the tokenizer for sequence generation tasks
)

# Train the model
trainer.train()



  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss
1,No log,7.20547
2,No log,6.684239
3,No log,6.419907


TrainOutput(global_step=3, training_loss=9.878597259521484, metrics={'train_runtime': 44.2925, 'train_samples_per_second': 0.271, 'train_steps_per_second': 0.068, 'total_flos': 406025404416.0, 'train_loss': 9.878597259521484, 'epoch': 3.0})

In [111]:
from transformers import Seq2SeqTrainer

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)



  trainer = Seq2SeqTrainer(


In [112]:
import transformers
print(transformers.__version__)


4.47.1
