In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from langchain.agents import Tool
from langchain.chat_models import ChatOpenAI
import os
import dotenv
import time
from tqdm import tqdm
from utils import load_json, save_json
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from LegalPrincipleExtractorTool import LegalPrincipleExtractor
from CitationExtractorTool import CitationExtractor
from legalHelperTool import LegalHelper
from FactsReaderTool import CaseFactsReader
from LawReaderTool import CaseLawReader
from prompts import get_prompt

In [3]:
dotenv.load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [4]:
llm = ChatOpenAI(
        temperature=0,
        model_name="gpt-4o-mini",
        openai_api_key=openai_api_key
    )

  llm = ChatOpenAI(


In [5]:
tools = [
        Tool(
            name='facts_reader',
            func=CaseFactsReader()._run,
            description="Load and return the case facts."
        ),
        Tool(
            name='law_reader',
            func=CaseLawReader()._run,
            description="Load and return the case law."
        ),
        Tool(
            name='legal_principle_extractor',
            func=LegalPrincipleExtractor()._run,
            description="Extract key legal principles from the case file."
        ),
        Tool(
            name='citation_extractor',
            func=CitationExtractor()._run,
            description="Extract citations from the case file."
        ),
        Tool(
            name='legal_helper',
            func=LegalHelper()._run,
            description="""A tool to answer specific legal questions about ECHR jurisprudence and legal principles. 
    Use this tool to ask focused questions about legal concepts, doctrines, or interpretations that will help  analyze the case. 
    The tool do not have access to the case file.
    Do NOT ask this tool to determine if a case is key - instead ask specific questions  about legal elements you identify from the principles and citations.
    To use this tool you must provide [file_path, question] as input.
    """
        )
    ]

In [6]:
import pandas as pd
data_path = '/Users/ahmed/Desktop/msc-24/ECHR/echr-processed/'
df1_path = '/Users/ahmed/Desktop/msc-24/TND/kc_classification_data/pre_cutoff_data/df_1.csv'
df4_path = '/Users/ahmed/Desktop/msc-24/TND/kc_classification_data/pre_cutoff_data/df_4.csv'
df1 = pd.read_csv(df1_path)['file_path'].to_list()
df4 = pd.read_csv(df4_path)['file_path'].to_list()

case_path = data_path + df4[0]
case = load_json(case_path)

In [7]:
from typing import Dict, Any, List
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import AgentAction, AgentFinish, LLMResult
import json
from datetime import datetime

class AgentLoggingCallback(BaseCallbackHandler):
    def __init__(self, log_file_path: str = None):
        """Initialize the callback handler with an optional log file path."""
        self.log_file_path = log_file_path or f"agent_logs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        self.logs = {
            "thoughts": [],
            "actions": [],
            "tool_outputs": [],
            "final_output": None,
            "errors": []
        }
    
    def on_agent_action(self, action: AgentAction, **kwargs) -> Any:
        """Log agent actions and thoughts."""
        log_entry = {
            "timestamp": datetime.now().isoformat(),
            "thought": action.log,
            "action": action.tool,
            "action_input": action.tool_input,
        }
        self.logs["thoughts"].append(log_entry)
        self.logs["actions"].append({
            "timestamp": datetime.now().isoformat(),
            "tool": action.tool,
            "input": action.tool_input
        })
        self._save_logs()

    def on_tool_end(self, output: str, **kwargs) -> Any:
        """Log tool outputs."""
        self.logs["tool_outputs"].append({
            "timestamp": datetime.now().isoformat(),
            "output": output
        })
        self._save_logs()

    def on_agent_finish(self, finish: AgentFinish, **kwargs) -> Any:
        """Log the final output."""
        self.logs["final_output"] = {
            "timestamp": datetime.now().isoformat(),
            "output": finish.return_values,
            "log": finish.log
        }
        self._save_logs()

    def on_error(self, error: Exception, **kwargs) -> Any:
        """Log any errors that occur."""
        self.logs["errors"].append({
            "timestamp": datetime.now().isoformat(),
            "error": str(error),
            "error_type": error.__class__.__name__
        })
        self._save_logs()

    def _save_logs(self):
        """Save logs to file."""
        with open(self.log_file_path, 'w') as f:
            json.dump(self.logs, f, indent=2)

In [9]:
prompt = hub.pull("hwchase17/react")
agent = create_react_agent(llm, tools,prompt)

# Create the callback handler
logger = AgentLoggingCallback()
tools = [LegalHelper().with_config(callback_manager=callback_manager)]


# Create agent executor with the callback
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,
    callbacks=[logger]  # Add the logging callback
)



In [14]:
from langchain.callbacks.manager import CallbackManager

callback_manager = CallbackManager([logger])

# Ensure each tool supports and uses the callback manager
for tool in tools:
    if hasattr(tool, "callback_manager"):
        tool.callback_manager = callback_manager


In [15]:
legal_prompt = get_prompt(case_path)
result = agent_executor.invoke({"input": legal_prompt})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to begin by extracting the case facts and law from the provided file to understand the context and background of the case. This will help me identify the core legal issues and the relevant ECHR articles involved.

Action: facts_reader  
Action Input: /Users/ahmed/Desktop/msc-24/ECHR/echr-processed/001-229412.json  [0m[36;1m[1;3m3. The list of applicants and the relevant details of the applications are set out in the appended table.
4. The applicants complained of the unlawful detention (deprivation of liberty). Some applicants also raised other complaints under the provisions of the Convention or Protocols to it.[0m[32;1m[1;3mI have extracted the case facts, which indicate that the applicants are complaining about unlawful detention and possibly other complaints under the ECHR provisions. This gives me a foundational understanding of the case context. Next, I need to extract the legal law to see how the court has

In [16]:
result

{'input': '\nYou are a specialized ECHR legal expert tasked with determining if a case is a key case in ECHR jurisprudence. Your role is to conduct an exhaustive legal analysis, akin to writing a detailed case commentary.\nYour task is to classify the case at /Users/ahmed/Desktop/msc-24/ECHR/echr-processed/001-229412.json into one of the following categories:\n\nKEY CASE:\n- Makes a significant contribution to the development, clarification, or modification of case law.\n- Establishes new legal principles or substantially modifies existing ones.\n- Has broad implications beyond the immediate case.\n\nNOT KEY CASE:\n- Applies existing case law without significant contributions to legal development.\n- Demonstrates limited implications beyond the immediate dispute.\n\n\nANALYSIS WORKFLOW:\n\n-Extract Case Facts and Law\n    - Load the case facts and law from the file to understand the context and background.\n\n- Identify Core Legal Issues\n    - Determine the key legal questions and ass

In [17]:
logger.logs

{'thoughts': [{'timestamp': '2024-12-16T23:57:50.936159',
   'thought': 'I need to begin by extracting the case facts and law from the provided file to understand the context and background of the case. This will help me identify the core legal issues and the relevant ECHR articles involved.\n\nAction: facts_reader  \nAction Input: /Users/ahmed/Desktop/msc-24/ECHR/echr-processed/001-229412.json  ',
   'action': 'facts_reader',
   'action_input': '/Users/ahmed/Desktop/msc-24/ECHR/echr-processed/001-229412.json'},
  {'timestamp': '2024-12-16T23:57:52.591860',
   'thought': 'I have extracted the case facts, which indicate that the applicants are complaining about unlawful detention and possibly other complaints under the ECHR provisions. This gives me a foundational understanding of the case context. Next, I need to extract the legal law to see how the court has approached similar issues in the past.\n\nAction: law_reader  \nAction Input: /Users/ahmed/Desktop/msc-24/ECHR/echr-processed/00

In [8]:
from utils import load_json, save_json, from_input_path_to_output_path
output_dir = "./results_openai_gpt4-mini/"
input_data_root = "../../../../ECHR/echr-processed/"

def check_processed_files(output_dir):
    already_processed_files = os.listdir(output_dir)

    # remove the prefixs
    already_processed_files = [
        f.replace("i_1_case_", "") for f in already_processed_files
    ]
    already_processed_files = [
        f.replace("i_2_case_", "") for f in already_processed_files
    ]
    already_processed_files = [
        f.replace("i_3_case_", "") for f in already_processed_files
    ]
    already_processed_files = [
        f.replace("i_4_case_", "") for f in already_processed_files
    ]

    error_files = []
    for file in already_processed_files:
        logged_file_path = from_input_path_to_output_path(input_data_root + file)
        logged_file = load_json(logged_file_path)
        if "Agent stopped due to iteration limit or time limit" in logged_file['result']['output']:
            error_files.append(file)

    print(f"Number of error files: {len(error_files)}")
    # remove the error files from already_processed_files
    already_processed_files = list(set(already_processed_files) - set(error_files))
    return already_processed_files


result = check_processed_files(output_dir)


Number of error files: 20


['001-187605.json',
 '001-177082.json',
 '001-214330.json',
 '001-207757.json',
 '001-189641.json',
 '001-181273.json',
 '001-187188.json',
 '001-211125.json',
 '001-178753.json',
 '001-207953.json',
 '001-211592.json',
 '001-191276.json',
 '001-197098.json',
 '001-213908.json',
 '001-210077.json',
 '001-209069.json',
 '001-200657.json',
 '001-210363.json',
 '001-203169.json',
 '001-203503.json',
 '001-210874.json',
 '001-204603.json',
 '001-175659.json',
 '001-194523.json',
 '001-203840.json',
 '001-180486.json',
 '001-200344.json',
 '001-207173.json',
 '001-187930.json',
 '001-209373.json',
 '001-183395.json',
 '001-180442.json',
 '001-208877.json',
 '001-218424.json',
 '001-187802.json',
 '001-177406.json',
 '001-177349.json',
 '001-205536.json',
 '001-186828.json',
 '001-176769.json',
 '001-179219.json',
 '001-192210.json',
 '001-177299.json',
 '001-198760.json',
 '001-210463.json',
 '001-194618.json',
 '001-187540.json',
 '001-220484.json',
 '001-189426.json',
 '001-220960.json',
