## Loading Judgement Texts

In [None]:
# prompt: read and load each file in a directory of text files into a list called files

# directory_path="/content/extracted_text"

# import os
# files=[]
# for filename in os.listdir(directory_path):
#   with open(os.path.join(directory_path, filename), 'r') as f:
#     files.append(f.read())


In [6]:
# With BigQuery DataFrames, you can use many familiar Pandas methods, but the
# processing happens in BigQuery rather than the runtime, allowing you to work with larger
# DataFrames that would otherwise not fit in the runtime memory.
# Learn more here: https://cloud.google.com/python/docs/reference/bigframes/latest

import bigframes.pandas as bf

bf.options.bigquery.location = "asia-south2" #this variable is set based on the dataset you chose to query
bf.options.bigquery.project = "extreme-course-459207-v5" #this variable is set based on the dataset you chose to query

In [7]:
df = bf.read_gbq("extreme-course-459207-v5.legal_judgement_texts.judgements") #this variable is set based on the dataset you chose to query

incompatibilies with previous reads of this table. To read the latest
version, set `use_cache=False` or close the current session with
Session.close() or bigframes.pandas.close_session().
  exec(code_obj, self.user_global_ns, self.user_ns)


In [8]:
df.head(5)

Unnamed: 0,filename,text
0,&name=MMH03102024LPA9882024_193659.txt,LPA No.988/2024 Page 1 of 3$~27 * IN THE HIGH ...
1,&name=CHS11072024OMPMISCCOMM5182024_193006.txt,O.M.P.(MISC.)(COMM.) 518/2024 Page 1of2$~49 * ...
2,&name=ABL04122024SC5982023_151341.txt,CS(COMM) 598/2023 Page 1 of 9$~33 * IN THE HIG...
3,&name=62712092024CW20622023_175803.txt,W.P. (C) 2062/2023 Page 1 of 7 $~ * IN THE H...
4,&name=MKO28052024CRLMM35412023_180926.txt,CRL.M.C.3541/2023 ...


In [9]:
df.iloc[0]['text']

"LPA No.988/2024\nPage 1 of 3$~27\n* IN THE HIGH COURT OF DELHI AT NEW DELHI\n+ LPA 988/2024 & C.M.Nos.58144-58146/2024\nMASTER EKLAVYA KOCHHAR & ORS. .....Appellants\nThrough: Mr.Nitin Bhardwaj with\nMr.M.P.Bhargava and Dr.Anurag\nBhardwaj, Advocates.\nversus\nBAL BHARATI PUBLIC SCHOOL & ORS. .....Respondents\nThrough: Mr.Kamal Gupta with Mr.Sparsh\nAggarwal and Ms.Yosha Dutt,\nAdvocates for School.\nMr.Karn Bhardwaj, ASC for GNCTD\nwith Mr.Shubham Singh, Mr.Rajat\nGaba and Mr.Saurabh Dahiya,\nAdvocates for DOE.\n% Date of Decision: 03rdOctober, 2024\nCORAM:\nHON'BLE THE CHIEF JUSTICE\nHON'BLE MR. JUSTICE TUSHAR RAO GEDELA\nJUDGMENT\nMANMOHAN, ACJ : (ORAL)\n1. Present appeal has been filed challenging the order dated 24thJuly,\n2024 passed by learned Single Judge in W.P.(C) 6112/2020 whereby learned\nSingle Judge dismissed the application being C.M.No.41586/2024 filed by\nthe Appellants herein to restrain Respondent no.1/school from charging fees\nas per the rejected fee structure pro

## Legal Entities Extraction

In [10]:
!pip3 install --upgrade --user google-genai



2. Use the following code in your application to request a model response

In [11]:
from google import genai
from google.genai import types
import base64

def generate(input_text):

  client = genai.Client(
      vertexai=True,
      project="extreme-course-459207-v5",
      location="us-central1",
  )

  prompt="""
  You will be provided with a raw legal judgement:

  Your task is to extract the following key components from the judgement:
.

  *  **Parties:** The entities (individuals, organizations, etc.) involved in the case.
  *  **Facts:** A summary of the key events and circumstances that led to the legal dispute.
  *  **Issues:** The specific legal questions or points of contention that the court must resolve.
  *  **Rulings:** The court's decisions or conclusions on each of the issues presented.
  *  **Legal Dependencies:** Any statutes, precedents, or other legal authorities that the court relied upon in making its rulings.

  Output the extracted information in a structured JSON format. The JSON object should have the following structure:
  {
     \"parties\": [
      {
       \"name\": \"\",
       \"role\": \"\"
      }
     ],
     \"facts\": \"\",
     \"rulings\":\"\",
     \"legal_citations\": [
      \"\",
      \"\"
   ]
  }


  Ensure that the extracted information is accurate and comprehensive. Ensure the citations are correct and DO NOT HALLUCINATE".
  Return only the json output and no other text
  """

  text=prompt+input_text

  text1 = types.Part.from_text(text=text)

  model = "publishers/google/models/gemini-2.0-flash-001"
  contents = [
    types.Content(
      role="user",
      parts=[
        text1
      ]
    )
  ]
  generate_content_config = types.GenerateContentConfig(
    temperature = 0.30,
    top_p = 0.95,
    seed = 0,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
  )

  result=""

  for chunk in client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    result+=chunk.text+""

  return result

Writing json files

In [12]:
import json

def str_to_dict(input_text: str) -> dict:
  striped_input_text_start=input_text.find("{")
  stripped_text=input_text[striped_input_text_start:]
  stripped_text=stripped_text.strip("```")
  return json.loads(stripped_text)

In [13]:
sample_judgement=df.iloc[1]['text']
sample_op=str_to_dict(generate(sample_judgement))

In [14]:
sample_op

{'parties': [{'name': 'GIRISH REHANI', 'role': 'Petitioner'},
  {'name': 'KELTECH INFRATRUCTURE LIMITED', 'role': 'Respondent'}],
 'facts': "The petitioner, Girish Rehani, filed a petition under Section 29A(4) of the Arbitration and Conciliation Act 1996 seeking an extension of the mandate of the arbitrator currently handling the dispute between the parties. The arbitrator's mandate was set to expire on July 16, 2024, and a six-month extension was requested. The respondent, Keltech Infrastructure Limited, did not object to the extension.",
 'rulings': 'The court granted the petition and extended the mandate of the learned Arbitrator by six months, effective from July 16, 2024. The petition was disposed of accordingly.',
 'legal_citations': ['Section 29A(4) of the Arbitration and Conciliation Act 1996']}

In [19]:
import pandas_gbq
from google.cloud import bigquery
import pandas as pd
import uuid  # Optional, if you prefer UUIDs instead of index as ID

def process_and_store_judgements(df):
    """
    Processes text column from the DataFrame, applies generate and str_to_dict functions,
    and stores the results in a new BigQuery table with an added 'id' column.

    Args:
        df: BigFrames DataFrame containing the 'text' column.
    """

    results = []
    for index, row in df.iterrows():
        try:
            json_output = str_to_dict(generate(row['text']))
            json_output["id"] = str(uuid.uuid4())  # Alternatively, use index: json_output["id"] = index
            results.append(json_output)
        except Exception as e:
            print(f"Error processing row {index}: {e}")

    # Convert the list of dictionaries to a Pandas DataFrame
    results_df = pd.DataFrame(results)
    return results_df


In [27]:
def df_to_table(results_df):
  # Define the schema for the BigQuery table
    schema = [
        bigquery.SchemaField("id", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("parties", bigquery.enums.SqlTypeNames.RECORD, mode="REPEATED", fields=[
            bigquery.SchemaField("name", bigquery.enums.SqlTypeNames.STRING),
            bigquery.SchemaField("role", bigquery.enums.SqlTypeNames.STRING)
        ]),
        bigquery.SchemaField("facts", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("rulings", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("legal_citations", bigquery.enums.SqlTypeNames.STRING, mode="REPEATED")
    ]
    job_config = bigquery.LoadJobConfig(
    schema=schema,
    write_disposition="WRITE_TRUNCATE",
)

    # Write the DataFrame to BigQuery
    client = bigquery.Client()

    client.load_table_from_dataframe(
        results_df,
        "extreme-course-459207-v5.legal_judgement_texts.extracted_judgements",
        job_config=job_config,
        )

In [21]:
# prompt: create a sample df with first 100 records

sample_df = df.head(100).to_pandas()

In [22]:
result=process_and_store_judgements(sample_df)

In [23]:
result.head()

Unnamed: 0,parties,facts,rulings,legal_citations,id
0,"[{'name': 'MASTER EKLAVYA KOCHHAR & ORS.', 'ro...",The Appellants filed an appeal challenging the...,"The Court dismissed the appeal, upholding the ...","[W.P.(C) 6112/2020, C.M.No.41586/2024, W.P. (C...",0bf2294f-3f5c-4739-8abe-bb13f66d2583
1,"[{'name': 'GIRISH REHANI', 'role': 'Petitioner...","The petitioner, Girish Rehani, filed a petitio...",The court granted the petition and extended th...,[Section 29A(4) of the Arbitration and Concili...,f00eddea-a05e-4ea2-aef2-715375bacf00
2,"[{'name': 'TATA SONS PRIVATE LIMITED', 'role':...",Tata Sons Private Limited filed a suit against...,"The court ruled in favor of the plaintiff, Tat...","[Indian Companies Act, 1913, Trade Marks Act, ...",777903aa-8d70-4800-8e6e-936cf688e023
3,"[{'name': 'Jaswant Singh Juneja', 'role': 'Pet...","The petitioner, Jaswant Singh Juneja, propriet...",The court allowed the writ petition and quashe...,"[Income Tax Act, 1961, Section 148, Income Tax...",a4b5fb8b-309b-415b-868d-c75d72582bc9
4,"[{'name': 'SHRI CHANDRAJIT SAHA', 'role': 'Pet...",The respondent/complainant placed an order wit...,The court allowed the petition and quashed the...,"[Section 482 Cr.P.C., Sections 138/142 read wi...",e00af299-4030-4fde-a93c-40ebcf1162a7


In [28]:
df_to_table(result)