# Installing packages

In [37]:
!pip install fuzzywuzzy
!pip install python-Levenshtein
!pip install networkx
!pip install matplotlib
!pip1 install pandas
!pip install tqdm
!pip install google-generativeai
!pip install python-dotenv

/bin/bash: line 1: pip1: command not found
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [1]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Load Extracted Data

In [2]:
import pandas as pd
citations_df = pd.read_json('./combined_citations_with_casenumber_updated_2.jsonl', lines=True)

In [3]:
citations_df.head()

Unnamed: 0,source,target,target_case_number,year,target_case_number_from_data
0,sc-spl-la-no-52/2003,gamini dissanayake v m.c.m. kaleel,1993/2-slr-135,,
1,sc-spl-la-no-52/2003,thilak karunaratne v bandaranayake,1993/1-slr-91,,
2,sc-spl-la-no-52/2003,galappati v bulegoda,1997/1-slr-393,,
3,sc-fr-252/2007,bandhua mukti morcha v union of india,a.i.r. 1984 s.c. 802,,
4,sc-fr-252/2007,maharaj singh v uttara pradesh,a.i.r. 1976 s.c. 2602,,


In [6]:
citations_df.shape

(76185, 5)

# Network Analysis

In [7]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

graph = nx.DiGraph()

# Parse the data and add nodes and edges
for _, row in citations_df.iterrows():
    case_id = row["source"]
    citations = row["target"]

    # Add the case as a node
    graph.add_node(case_id)

    # If citations exist and are not "no cases cited"
    if citations != "no cases cited":
        graph.add_edge(case_id, citations)

# # Visualize the graph
# plt.figure(figsize=(50, 40))

# # Position nodes
# pos = nx.spring_layout(graph)

# # Draw the network
# nx.draw_networkx_nodes(graph, pos, node_size=70, node_color="lightblue")
# nx.draw_networkx_edges(graph, pos, arrowstyle="->", arrowsize=2)
# # nx.draw_networkx_labels(graph, pos, font_size=10, font_color="black")

# # Title for the graph
# plt.title("Case Citations Network", fontsize=16)

# # Display the graph
# plt.show()

In [17]:
in_degree = graph.in_degree()
out_degree = graph.out_degree()
centrality = nx.degree_centrality(graph)

In [29]:
highest_in_degree = sorted(in_degree, key=lambda x: x[1], reverse=True)[:100]
print("Highest In-Degree Nodes:")
for node, degree in highest_in_degree:
    print(f"{node} -> {degree}")

Highest In-Degree Nodes:
fernando v fernando -> 219
silva v silva -> 188
perera v perera -> 111
alwis v piyasena fernando -> 98
council of civil service unions v minister for the civil service -> 97
ridge v baldwin -> 73
perera v fernando -> 72
ramalingam v thangarajah -> 66
sumanasena v attorney general -> 55
rasheed ali v mohamed ali -> 52
sarwan singh v state of punjab -> 52
de silva v de silva -> 49
bank of ceylon v kaleel and others -> 45
rustom v hapangama -> 45
thajudeen v sri lanka tea board and another -> 45
tillekeratne v bastian -> 44
queen v kularatne -> 44
juliana hamine v don thomas -> 43
corea v appuhamy -> 43
fernando v perera -> 42
wanigaratne v juwanis appuhamy -> 41
chelliah v wijenathan -> 41
perera v attorney general -> 40
silva v fernando -> 40
perera v silva -> 39
punchi nona v padumasena and others -> 38
king v appuhamy -> 38
mariam beebee v seyed mohamed -> 36
biso menika v cyril de alwis -> 35
dharmadasa v jayasena -> 35
associated provincial picture houses lt

# Create Dataframe for top citations

In [32]:
import pandas as pd

# Initialize an empty DataFrame
in_data_df = pd.DataFrame(columns=['nodes', 'as_citations','no of cases appeared'])

# Find the nodes with the highest centrality
highest_in_degree = sorted(in_degree, key=lambda x: x[1], reverse=True)[:100]

# Iterate over the highest centrality nodes and populate the DataFrame
for node, centrality_value in highest_in_degree:
    as_citations = citations_df[citations_df['target'] == node]['source'].tolist()  # Filter sources where target matches the node
    as_citations_no = len(as_citations)
    # Append the data as a new row in the DataFrame
    # as_cases = citations_df[citations_df['source'] == node]['target'].tolist()
    # as_cases_no = len(as_cases)
    in_data_df = pd.concat([in_data_df, pd.DataFrame({'nodes': [node], 'as_citations': [as_citations], 'no of cases appeared' : [as_citations_no]})], ignore_index=True)

In [33]:
in_data_df.head()

Unnamed: 0,nodes,as_citations,no of cases appeared
0,fernando v fernando,"[sc-spl-la-no-7/2004, sc-spl-la-no-83/2008, ca...",233
1,silva v silva,"[ca-dc-1005/1997, ca-dc-913/1997, ca-la-399/20...",193
2,perera v perera,"[ca-dc-839/1998, ca-dc-400-2000400/2000, ca-wr...",113
3,alwis v piyasena fernando,"[sc-spl-la-no-79/2008, ca-dc-776/1999, ca-dc-3...",98
4,council of civil service unions v minister for...,"[sc-fr-589/2009, sc-spl-la-no-108/2008, ca-wri...",98


# Find Case number of citations

In [25]:
main_data = pd.read_json('./Copy of merged_sc_ca_sd_sslr_nlr_collection_oct_24.jsonl', lines=True)

In [34]:
from fuzzywuzzy import fuzz

def find_matching_case_number(word, cutoff=70):
    # Convert the word to lowercase for case-insensitive matching
    word_lower = word.lower()

    # Initialize variables to store the best match and highest score
    best_match = None
    highest_score = 0

    # Iterate through the rows of main_data
    for index, row in main_data.iterrows():
        # Get the nameofparties and convert to lowercase
        nameofparties_lower = row['nameofparties'].lower()

        # Compute the fuzzy match score
        score = fuzz.UQRatio(word_lower, nameofparties_lower)

        # Update the best match if the current score is higher than the highest score
        if score > highest_score and score >= cutoff:
            highest_score = score
            best_match = row['standard_casenumber']
        elif score > highest_score:
            highest_score = score
            best_match = f">>{row['standard_casenumber']}"


    return best_match



In [35]:
from tqdm import tqdm

# Apply the function with progress tracking
tqdm.pandas()
in_data_df['node_casenumber'] = in_data_df['nodes'].progress_apply(find_matching_case_number)

100%|██████████| 100/100 [03:14<00:00,  1.95s/it]


In [36]:
in_data_df.head()

Unnamed: 0,nodes,as_citations,no of cases appeared,node_casenumber
0,fernando v fernando,"[sc-spl-la-no-7/2004, sc-spl-la-no-83/2008, ca...",233,3-nlr-99
1,silva v silva,"[ca-dc-1005/1997, ca-dc-913/1997, ca-la-399/20...",193,2006/v2-sri-lr-80
2,perera v perera,"[ca-dc-839/1998, ca-dc-400-2000400/2000, ca-wr...",113,1978/v2-sri-lr-191
3,alwis v piyasena fernando,"[sc-spl-la-no-79/2008, ca-dc-776/1999, ca-dc-3...",98,1993/v1-sri-lr-119
4,council of civil service unions v minister for...,"[sc-fr-589/2009, sc-spl-la-no-108/2008, ca-wri...",98,>>ca-writ-548/2010


# Categorizing

In [40]:
import os
import dotenv
import google.generativeai as genai


In [41]:
dotenv.load_dotenv()
genai.configure(api_key=os.getenv('GEMINI_API'))

model = genai.GenerativeModel("gemini-1.5-flash")

In [42]:
def find_case_type(case_summary):
    response = model.generate_content(
        f"""
        Input: {case_summary}

        Using the given input Analyze and find domain , category of the case
        - Use the Give info

        1. Civil Cases: These involve disputes between individuals or entities regarding rights, property, or obligations. Common categories include:

        Contract disputes: Breaches of agreements or contracts.
        Property disputes: Issues involving land, ownership, and real estate.
        Tort cases: Claims for personal injury or damage caused by negligence or wrongdoing.
        Family law cases: Including divorce, child custody, and inheritance issues.

        2. Criminal Cases: These involve violations of the law that are prosecuted by the state. Categories include:

        Serious crimes: Such as murder, sexual offenses, and drug trafficking.
        Minor crimes: Like theft, assault, or vandalism.
        Offenses against national security: These cases are often handled under special laws like the Prevention of Terrorism Act.

        3. Administrative and Constitutional Law: Cases related to the enforcement of fundamental rights, governance, or disputes involving state action. This includes:

        Infringement of fundamental rights: Cases concerning rights like equality, freedom of speech, and protection from torture.
        Public law disputes: Involving the conduct of government agencies or officials, often resolved through the Supreme Court.

        4. Family Law and Personal Law: Sri Lanka has several personal law systems, particularly:

        Muslim Personal Law: Governs matters like marriage, divorce, and inheritance for Muslims.
        Kandyan and Thesavalamai Law: Customary laws for specific ethnic communities, like the Kandyan Sinhalese or Tamil families.
        General Law: A combination of Roman-Dutch and English law governing the general population unless otherwise specified by personal laws

        - catergorize only as one domain and category

        - give the output as "domain, category"


        Output:
        """
    )

    return response.text

In [45]:
import ast
import pandas as pd

# Ensure 'standard_casenumber', 'Domain', and 'Category' columns exist in in_data_df
for col in ['standard_casenumber', 'Domain', 'Category']:
    if col not in in_data_df.columns:
        in_data_df[col] = None

for i in range(len(in_data_df)):
    try:
        # Extract and evaluate the case name
        case_name = in_data_df.iloc[i]['as_citations'][0]
        main_case_name = in_data_df.iloc[i]['nodes']

        # Find the case number and summary
        matching_case = main_data[main_data['standard_casenumber'] == case_name]
        if matching_case.empty:
            raise ValueError(f"No matching case found for case name: {case_name}")

        case_number = matching_case['standard_casenumber'].iloc[0]
        case_summary = matching_case['decision_text'].iloc[0]

        # Find the main case number
        main_case_match = main_data[main_data['normalized_names'] == main_case_name]
        if not main_case_match.empty:
            main_case_number = main_case_match['standard_casenumber'].iloc[0]
        else:
            main_case_number = 'not found in main_data'

        print(f"case number is {case_number}")
        # print(f"case summary is {case_summary}")

        # Find domain and category
        d_and_t = find_case_type(case_summary)  # Ensure `find_case_type` is defined
        in_data_df.at[i, 'standard_casenumber'] = main_case_number
        in_data_df.at[i, 'Domain'] = d_and_t.split(",")[0].strip()
        in_data_df.at[i, 'Category'] = d_and_t.split(",")[1].strip()
        print(d_and_t)
    except Exception as e:
        print(f"Error processing case {i}: {str(e)}")


case number is sc-spl-la-no-7/2004
Civil Cases, Property disputes

case number is ca-dc-1005/1997
Civil, Property disputes

case number is ca-dc-839/1998
Civil Cases, Contract disputes (or Property disputes, depending on the underlying nature of the original District Court case)

case number is sc-spl-la-no-79/2008
Criminal, Serious crimes

case number is sc-fr-589/2009
Administrative and Constitutional Law, Infringement of fundamental rights

case number is sc-spl-la-no-71/2007
Civil Cases, Property disputes

case number is ca-rv-08/2014
Civil, Property disputes

case number is ca-tax-06/2016
Administrative and Constitutional Law, Tax Dispute

case number is ca-hcc-220/2010
Criminal, Serious crimes

case number is sc-spl-la-no-09/2002
Civil Cases, Property disputes

case number is ca-hcc-0422/2017
Criminal, Serious crimes

case number is sc-spl-la-no-49/2003
Civil Cases, Property disputes

case number is ca-phc-24/2015
Criminal, Minor crimes

case number is ca-dc-mt-lavinia-1963/2005


In [46]:
in_data_df.head()

Unnamed: 0,nodes,as_citations,no of cases appeared,node_casenumber,standard_casenumber,Domain,Category
0,fernando v fernando,"[sc-spl-la-no-7/2004, sc-spl-la-no-83/2008, ca...",233,3-nlr-99,3-nlr-99,Civil Cases,Property disputes
1,silva v silva,"[ca-dc-1005/1997, ca-dc-913/1997, ca-la-399/20...",193,2006/v2-sri-lr-80,2006/v2-sri-lr-80,Civil,Property disputes
2,perera v perera,"[ca-dc-839/1998, ca-dc-400-2000400/2000, ca-wr...",113,1978/v2-sri-lr-191,1978/v2-sri-lr-191,Civil Cases,Contract disputes (or Property disputes
3,alwis v piyasena fernando,"[sc-spl-la-no-79/2008, ca-dc-776/1999, ca-dc-3...",98,1993/v1-sri-lr-119,1993/v1-sri-lr-119,Criminal,Serious crimes
4,council of civil service unions v minister for...,"[sc-fr-589/2009, sc-spl-la-no-108/2008, ca-wri...",98,>>ca-writ-548/2010,not found in main_data,Administrative and Constitutional Law,Infringement of fundamental rights
