In [2]:
%pip install neo4j


Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/312.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.4/312.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neo4j
Successfully installed neo4j-5.28.1


In [None]:
# #cases, acts reference , but no all the relevant acts displaying for a case
# # 📦 Required imports
# import json
# import re
# import pandas as pd
# from neo4j import GraphDatabase

# # 🔹 Step 1: Load JSON file
# file_path = r"C:\Users\User\Desktop\Legal-Research-Platform-Core\resources\cases_2024.json"
# with open(file_path, 'r', encoding='utf-8') as file:
#     case_data = json.load(file)

# # 🔹 Step 2: Regular expression patterns for act references
# act_patterns = [
#     r"Bail\s+Act\s+No\.\s*\d+\s*of\s*\d+",
#     r"Penal\s+Code\s*\(Amendment\)\s*Act\s+No\.\s*\d+\s*of\s*\d+",
#     r"Evidence\s*\(Special\s*Provisions\)\s*Act\s+No\.\s*\d+",
#     r"Code\s+of\s+Criminal\s+Procedure\s*Act\s+No\.\s*\d+",
#     r"Constitution\s+of\s+the\s+Democratic\s+Socialist\s+Republic\s+of\s+Sri\s+Lanka",
#     r"Criminal\s+Procedure\s+Code\s+Act\s+No\.\s*\d+",
#     r"Children\s+and\s+Young\s+Persons\s+Ordinance\s*Cap\.\s*\d+"
# ]
# compiled_patterns = [re.compile(p, re.IGNORECASE) for p in act_patterns]

# # 🔹 Step 3: Extract acts per case
# case_act_map = []
# for case in case_data:
#     case_id = case.get("id", "Unknown")
#     content = case.get("text", "")

#     acts_found = set()
#     for pattern in compiled_patterns:
#         matches = pattern.findall(content)
#         for match in matches:
#             acts_found.add(match.strip())

#     case_act_map.append({
#         "case_id": case_id,
#         "acts": list(acts_found)
#     })

# # 🔹 Step 4: Save & Preview
# df = pd.DataFrame(case_act_map)
# pd.set_option('display.max_colwidth', None)
# from IPython.display import display
# display(df)
# df.to_csv("case_to_acts_array.csv", index=False, encoding="utf-8")

# # 🔹 Step 5: Neo4j connector class
# class Neo4jConnector:
#     def __init__(self, uri, user, password):
#         self.driver = GraphDatabase.driver(uri, auth=(user, password))

#     def close(self):
#         self.driver.close()

#     def push_case_with_acts(self, case_id, acts):
#         with self.driver.session() as session:
#             session.execute_write(self._create_graph_tx, case_id, acts)

#     @staticmethod
#     def _create_graph_tx(tx, case_id, acts):
#         # Create only case_id property — no filename
#         tx.run(
#             """
#             MERGE (c:Case {id: $case_id})
#             SET c.case_id = $case_id
#             """, case_id=case_id
#         )

#         for act in acts:
#             tx.run(
#                 """
#                 MERGE (a:Act {name: $act})
#                 WITH a
#                 MATCH (c:Case {id: $case_id})
#                 MERGE (c)-[:REFERS_TO]->(a)
#                 """, case_id=case_id, act=act
#             )

# # 🔹 Step 6: Connect to Neo4j
# neo4j_conn = Neo4jConnector(uri="bolt://localhost:7687", user="neo4j", password="neo4gS@123")

# # 🔄 Step 7: Insert all cases + acts
# for entry in case_act_map:
#     if entry["acts"]:
#         neo4j_conn.push_case_with_acts(entry["case_id"], entry["acts"])

# # ✅ Step 8: Close connection
# neo4j_conn.close()


In [11]:
# 📦 Required imports
import json
import re
import pandas as pd
from neo4j import GraphDatabase
from IPython.display import display

# 📁 🔹 Step 1: Load JSON file
# This path assumes you are running in an environment like Google Colab with your file in Google Drive.
# Make sure to mount your drive first.
file_path = r"/content/drive/MyDrive/FYP/cases_2024.json"

try:
    with open(file_path, 'r', encoding='utf-8') as file:
        case_data = json.load(file)
    print("Successfully loaded case data.")
except FileNotFoundError:
    print(f"Error: The file was not found at {file_path}")
    print("Please ensure the file path is correct and that your Google Drive is mounted if you're using Colab.")
    # Exit or handle the error appropriately
    case_data = []


# 🔍 🔹 Step 2: Enhanced regex pattern for Act references
act_pattern = re.compile(
    # Catches patterns like 'The Evidence Act No 14 of 2015'
    r'\b((?:[A-Z][a-z]+\s+){0,7}(?:Act|Code|Ordinance|Law|Regulation|Rules))\s*(?:No\.?|Number)\s*(\d+)\s*of\s*(\d{4})',
    re.IGNORECASE
)

# ✨ 🔹 Step 3: Function to Standardize Act Names
def standardize_act_name(raw_act_text):
    """
    Parses the raw act text and returns it in a standardized format.
    Example: "the penal code act no 15 of 1979" -> "Penal Code Act No. 15 of 1979"
    """
    match = act_pattern.search(raw_act_text)
    if match:
        # Reconstruct from captured groups for consistency
        name = match.group(1).strip().title()
        number = match.group(2)
        year = match.group(3)
        # Ensure title case for the act name itself, e.g., "The Penal Code Act" -> "The Penal Code Act"
        name_parts = name.split()
        if name_parts[-1].lower() in ["act", "code", "ordinance", "law", "regulation", "rules"]:
             name = ' '.join(p.capitalize() for p in name_parts)

        return f"{name} No. {number} of {year}"

    # Fallback for any partial matches (though the regex is quite specific)
    return ' '.join(raw_act_text.split()).title()


# 📊 🔹 Step 4: Extract and Standardize Acts per Case
case_act_map = []

if case_data:
    for case in case_data:
        case_id = case.get("id", "Unknown")
        content = case.get("text", "")

        raw_acts_found = set()
        for match in act_pattern.finditer(content):
            raw_acts_found.add(match.group(0).strip())

        # Standardize the acts that were found
        standardized_acts = {standardize_act_name(act) for act in raw_acts_found}

        case_act_map.append({
            "case_id": case_id,
            "raw_acts": sorted(list(raw_acts_found)),
            "standardized_acts": sorted(list(standardized_acts))
        })

# 📁 🔹 Step 5: Save & Preview
df = pd.DataFrame(case_act_map)
pd.set_option('display.max_colwidth', None)
print("\nPreview of extracted and standardized acts:")
display(df)

# Save to CSV for review
df.to_csv("case_to_acts_extracted_standardized.csv", index=False, encoding="utf-8")
print("\nSaved extracted data to case_to_acts_extracted_standardized.csv")


# 🧠 🔹 Step 6: Neo4j connector class
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def verify_connection(self):
        """Verifies the connection to the database is alive."""
        try:
            self.driver.verify_connectivity()
            print("Connection to Neo4j AuraDB verified successfully.")
        except Exception as e:
            print(f"Failed to connect to Neo4j: {e}")
            raise

    def push_case_with_acts(self, case_id, acts):
        with self.driver.session() as session:
            session.execute_write(self._create_graph_tx, case_id, acts)

    @staticmethod
    def _create_graph_tx(tx, case_id, acts):
        # Create Case node
        tx.run(
            """
            MERGE (c:Case {id: $case_id})
            SET c.case_id = $case_id
            """, case_id=case_id
        )

        # Create Act nodes with standardized names + relationships
        for act in acts:
            tx.run(
                """
                MERGE (a:Act {name: $act})
                WITH a
                MATCH (c:Case {id: $case_id})
                MERGE (c)-[:REFERS_TO]->(a)
                """, case_id=case_id, act=act
            )

# ✅ 🔹 Step 7: Connect and push to Neo4j
print("\nAttempting to connect to Neo4j AuraDB...")

# --- IMPORTANT ---
# The URI must start with "neo4j+s://" for AuraDB
# The password is the one generated by Aura when you created the database
NEO4J_URI = "neo4j+s://66d16355.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "G4UXZ6KLGd1dzo57rp6ITypJHZ37aM1fn-exAWdw3p8"

try:
    neo4j_conn = Neo4jConnector(
        uri=NEO4J_URI,
        user=NEO4J_USER,
        password=NEO4J_PASSWORD
    )
    # Check if the connection is valid before proceeding
    neo4j_conn.verify_connection()

    # Insert all case-acts into the graph using the STANDARDIZED act names
    print("Pushing data to Neo4j...")
    count = 0
    for entry in case_act_map:
        # Only push entries that have associated acts
        if entry["standardized_acts"]:
            neo4j_conn.push_case_with_acts(entry["case_id"], entry["standardized_acts"])
            count += 1

    print(f"Successfully processed and pushed {count} cases with act relationships to Neo4j.")

except Exception as e:
    print(f"An error occurred during the Neo4j operation: {e}")

finally:
    # 🔚 🔹 Step 8: Close connection
    if 'neo4j_conn' in locals() and neo4j_conn.driver:
        neo4j_conn.close()
        print("Neo4j connection closed.")

Successfully loaded case data.

Preview of extracted and standardized acts:


Unnamed: 0,case_id,raw_acts,standardized_acts
0,d66a6895-c339-4bd0-9992-790b7b5f4a17,"[ACT NO. 2 of 1995, CONSIDER THE RELEVANT \nprovistions of the bail ACT NO. 30 of 1997]","[Act No. 2 of 1995, Consider The Relevant Provistions Of The Bail Act No. 30 of 1997]"
1,4aaafdf5-8ac9-4086-b62e-485d250b02bb,"[Act \nNo.32 of 1999, Act No 32 of 1999, Act No. 32 of 1999, Act No.32 of 1999, of the Code of Criminal \nProcedure Act No.15 of 1979, of the Penal Code as \namended Act No.22 of 0995]","[Act No. 32 of 1999, Of The Code Of Criminal Procedure Act No. 15 of 1979, Of The Penal Code As Amended Act No. 22 of 0995]"
2,f81236b6-7c88-4337-9701-772651a56abe,[],[]
3,0fe6fe07-fd7d-4b4c-a5d3-3644e9c56b56,[],[]
4,b655451f-cad0-4cc4-b5ce-6bc81dbbee30,[],[]
...,...,...,...
524,ac7fca87-465f-4a22-b064-a4fa1ffcefc1,[of the Code of Criminal \nProcedure Act No.15 of 1979],[Of The Code Of Criminal Procedure Act No. 15 of 1979]
525,4c4ebba0-1876-43ba-adf6-79afca98fceb,"[and Flora Protection Ordinance as \namended by Act No.22 of 2009, and Flora \nProtection Ordinance as amended by Act No. 22 of 2009]",[And Flora Protection Ordinance As Amended By Act No. 22 of 2009]
526,a0079550-0b33-46e2-a360-f01edecb103c,[of the Code of Criminal \nProcedure Act No.15 of 1979],[Of The Code Of Criminal Procedure Act No. 15 of 1979]
527,313e5fae-828a-40d0-ac21-489a6e6d05d4,"[in terms \nof the Companies Act No. 117 of 1982, ratuity Act No. 12 of \n1983, ratuity Act No. 12 of 1983, registered in terms of Companies Act No. 7 of 2007, requirement under \npayment of Gratuity Act No.12 of 1983]","[In Terms Of The Companies Act No. 117 of 1982, Ratuity Act No. 12 of 1983, Registered In Terms Of Companies Act No. 7 of 2007, Requirement Under Payment Of Gratuity Act No. 12 of 1983]"



Saved extracted data to case_to_acts_extracted_standardized.csv

Attempting to connect to Neo4j AuraDB...
Connection to Neo4j AuraDB verified successfully.
Pushing data to Neo4j...
Successfully processed and pushed 385 cases with act relationships to Neo4j.
Neo4j connection closed.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
