In [1]:
import os
import autogen
from autogen import AssistantAgent, UserProxyAgent, ConversableAgent
from IPython import get_ipython

In [2]:
OAI_CONFIG_LIST = {
    'model': 'gpt-4o',
    'api_key': 'sk-'
}

In [3]:
OPENAI_MODEL = "gpt-4o"

config_list = [{
    "model": OPENAI_MODEL,
    "api_key": "sk-"
}]

In [4]:
llm_config = {
    "timeout": 600,
    "cache_seed": 42,
    "config_list": config_list,
    "temperature": 0,
    "functions": [
        {
            "name": "python",
            "description": "run cell in ipython and return the execution result.",
            "parameters": {
                "type": "object",
                "properties": {
                    "cell": {
                        "type": "string",
                        "description": "Valid Python cell to execute.",
                    }
                },
                "required": ["cell"],
            },
        }
    ],
    "config_list": config_list,
    "timeout": 120,
}


In [5]:
PATH=os.getcwd()
batch_size=5
evaluation_sample_size=20

In [6]:
gpt_config = {
    "cache_seed": 42,
    "temperature": 0,
    "config_list": config_list,
    "timeout": 600,
}

In [7]:
# Schema Updater Agent
schema_gen = autogen.AssistantAgent(
    name="schema_gen",
    llm_config=llm_config,
    system_message=f"Specializes in reading court order documents iteratively and generating comprehensive schemas with all important information. Reads and updates schema for only {batch_size} documents at a time, updating the schema after each batch. Schemas should be strictly based on the information in the documents. No dummy schema or hallucination allowed. Assign correct type to the keys based on the information in the documents, not your intuition. Ensure correct variable type for the keys. Use bools for yes/no questions, strings for text, and numbers for numerical values.",
)

# Code Generator Agent
code_generator = autogen.AssistantAgent(
    name="code_generator",
    llm_config=llm_config,
    system_message="Generates necessary code for processing documents and managing schemas, returning it in the required format. Also executes the code. Strictly, only use python code generation."
)

# User Interface Agent
user_interface = autogen.UserProxyAgent(
   name="user_interface",
   system_message="Admin interface for user input. Supports user queries and termination signals. Invoke human feedback after each batch of schema generation and evaluation",
   code_execution_config={"work_dir":"coding", "use_docker": False},
   max_consecutive_auto_reply=3,
   llm_config=llm_config,
   human_input_mode="ALWAYS"
)

closeness_checker = autogen.AssistantAgent(
    name="closeness_checker",
    llm_config=llm_config,
    system_message=(
         f"""Regenerates a random sample of {evaluation_sample_size} percent of original documents in a batch by just using the schema generated till then. It then compares the generated regenerated with the original documents. Evaluates regenerated court order documents to ensure all information from the original document is incorporated.
            By this comparison, the keys/questions not incorprated in the schema are identified and reported.
        Provides detailed feedback on the generated schema of the given batch to optimize the completeness of the schemas. It is strict critic of generated schema and flawlessly ensures that not even minute bit of information is lost. """
    )
)

In [8]:
def exec_python(cell):
    ipython = get_ipython()
    result = ipython.run_cell(cell)
    log = str(result.result)
    if result.error_before_exec is not None:
        log += f"\n{result.error_before_exec}"
    if result.error_in_exec is not None:
        log += f"\n{result.error_in_exec}"
    return log

def exec_sh(script):
    return user_interface.execute_code_blocks([("sh", script)])

In [9]:
schema_gen.register_function(function_map={
        "python": exec_python
})

code_generator.register_function(function_map={
        "python": exec_python
})
user_interface.register_function(function_map={
        "python": exec_python
})

closeness_checker.register_function(function_map={
        "python": exec_python
})

In [10]:
groupchat = autogen.GroupChat(agents=[user_interface, schema_gen,code_generator,closeness_checker], messages=[], max_round=100000)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt_config)

In [11]:
user_interface.initiate_chat(
    manager,
    message=f""" Use Python only. Return the schema in standard JSON format. Load the text court orders from the provided folder path {PATH}. The files are inside the folder. First and foremost, read the text files for first batch. Calculate number of batches before processing first batch.
    Read the court judgments iteratively, updating and improving the schema each time after reading a batch. Strictly perform document regeneration and closeness checking at the end updating schema for each batch by randomly selecting documents to regenerate and compare. Do not skip this step.
    Focus on generating a standardized schema with keys and their datatypes only, not values. Make it clear, I do not want to store any data from these documents but want to study the strcuture of these documents; that is, what are the questions that are needed to be answered to create document like these.
    """
)


[33muser_interface[0m (to chat_manager):

 Use Python only. Return the schema in standard JSON format. Load the text court orders from the provided folder path e:\schemaGenerator. The files are inside the folder. First and foremost, read the text files for first batch. Calculate number of batches before processing first batch.
    Read the court judgments iteratively, updating and improving the schema each time after reading a batch. Strictly perform document regeneration and closeness checking at the end updating schema for each batch by randomly selecting documents to regenerate and compare. Do not skip this step.
    Focus on generating a standardized schema with keys and their datatypes only, not values. Make it clear, I do not want to store any data from these documents but want to study the strcuture of these documents; that is, what are the questions that are needed to be answered to create document like these.
    

------------------------------------------------------------

5

[33mcode_generator[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
5
[32m***************************************************[0m

--------------------------------------------------------------------------------
[33mschema_gen[0m (to chat_manager):

There are 5 batches of documents to process. Let's start with the first batch of 5 documents and generate the initial schema.

I'll read the first 5 documents and create a schema based on their structure.
[32m***** Suggested function Call: python *****[0m
Arguments: 
{"cell":"import os\n\n# Define the folder path\nfolder_path = \"e:/schemaGenerator\"\n\n# List all text files in the folder\ntext_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]\n\n# Read the first batch of 5 documents\nbatch_size = 5\nfirst_batch_files = text_files[:batch_size]\n\n# Read the content of the first batch of files\nfirst_batch_content = []\nfor file_name in first_batch_files:\n    with open(os.path.join(

['Court Tehsildar-Sadar, Varanasi. Case number-J202414700104931 year-2024 under section-34/35 UP Revenue Code, Mauza-Bhagwanpur Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nSmt. Madhuri Devi vs. Smt. Sita Devi\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-1133 dated-16/02/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Out of the area of â€‹â€‹0.3020 hectare of Arazi number-529mi, area of â€‹â€‹690 square feet i.e. area-64.12 square meters situated in Mauza-Bhagwanpur Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Smt. Sita Devi to Mrs. Madhuri Devi. As per rules, the suit was registered and a proclamation letter was issued which was served and the file is attached. In the absence of any objection, the suit is undisputed.\nThe buyer has stated in his evidence that even after purchasing the said land, as per the original sale deed, extract Kha

[33muser_interface[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['Court Tehsildar-Sadar, Varanasi. Case number-J202414700104931 year-2024 under section-34/35 UP Revenue Code, Mauza-Bhagwanpur Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nSmt. Madhuri Devi vs. Smt. Sita Devi\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-1133 dated-16/02/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Out of the area of â€‹â€‹0.3020 hectare of Arazi number-529mi, area of â€‹â€‹690 square feet i.e. area-64.12 square meters situated in Mauza-Bhagwanpur Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Smt. Sita Devi to Mrs. Madhuri Devi. As per rules, the suit was registered and a proclamation letter was issued which was served and the file is attached. In the absence of any objection, the suit is undisputed.\nThe buyer has st

['Court of Tehsildar-Sadar, Varanasi. Case number-202414700101771 year-2024 under section-34/35 UP Revenue Code, Mauza-Phulwaria Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nSmt. Sushila Devi vs. Shri Vijay Kumar Pal\n\nDecision\nThis suit of transfer is mentioned on the registered donation letter number-57 dated-04/01/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Mr. Vijay Kumar Pal has donated only 680 sq. ft. i.e. 63.19 sq. m. area out of his purchased area of â€‹â€‹1360 sq. ft. in Arazi number-Mr.456/1 area 0.6070 hectare to Mrs. Sushila Devi of Mauza-Phulwaria Pargana-Dehat Amanat, Tehsil and District-Varanasi. As per rules, the suit was registered and promulgation letter was issued which was served and the file is attached. In the absence of any objection, the suit is undisputed.\n\nThe donee has stated in his evidence that even after purchasing the said land, the donee will have less than 

[33mcloseness_checker[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['Court of Tehsildar-Sadar, Varanasi. Case number-202414700101771 year-2024 under section-34/35 UP Revenue Code, Mauza-Phulwaria Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nSmt. Sushila Devi vs. Shri Vijay Kumar Pal\n\nDecision\nThis suit of transfer is mentioned on the registered donation letter number-57 dated-04/01/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Mr. Vijay Kumar Pal has donated only 680 sq. ft. i.e. 63.19 sq. m. area out of his purchased area of â€‹â€‹1360 sq. ft. in Arazi number-Mr.456/1 area 0.6070 hectare to Mrs. Sushila Devi of Mauza-Phulwaria Pargana-Dehat Amanat, Tehsil and District-Varanasi. As per rules, the suit was registered and promulgation letter was issued which was served and the file is attached. In the absence of any objection, the suit is undisputed.\n\nT

['Court of Tehsildar-Sadar, Varanasi. Case number-202414700105301 year-2024 under section-34/35 UP Revenue Code, Mauza-Nasirpur Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nSmt. Tanya Pal vs. Mrs. Sushma Yadav\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-1277 dated-20/02/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Arazi number-Mi141/1 area 0.0490 hectare, out of the total purchased area of â€‹â€‹1700 sq. ft., the area sold of 1360 sq. ft. i.e. area-126.394 sq. m. situated in Mauza-Nasirpur Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Mrs. Sushma Yadav to Mrs. Tanya Pal. As per the rules, the suit was registered and proclamation letter was issued which was served and the file is attached. In the absence of any objection, the suit is undisputed.\nThe purchaser has stated in his evidence that even after purchasing the said land, the purch

[33mcode_generator[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['Court of Tehsildar-Sadar, Varanasi. Case number-202414700105301 year-2024 under section-34/35 UP Revenue Code, Mauza-Nasirpur Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nSmt. Tanya Pal vs. Mrs. Sushma Yadav\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-1277 dated-20/02/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Arazi number-Mi141/1 area 0.0490 hectare, out of the total purchased area of â€‹â€‹1700 sq. ft., the area sold of 1360 sq. ft. i.e. area-126.394 sq. m. situated in Mauza-Nasirpur Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Mrs. Sushma Yadav to Mrs. Tanya Pal. As per the rules, the suit was registered and proclamation letter was issued which was served and the file is attached. In the absence of any objection, the suit is 

['Court of Tehsildar-Sadar, Varanasi. Case number-202414700103039 year-2024 under section-34/35 of UP Revenue Code, Mauza-Aude Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nShri Afroz Ahmed vs Shri Dr. Rana Gopal Singh\n\nDecision\nThis suit of transfer is recorded on the registered sale deed received from the registration office Varanasi\n\nNo.-605 dated-30/01/2024. Which was instituted on the basis of the transfer report of the Lekhpal. Out of the area of â€‹â€‹0.2900 hectare, land number-1114, area of â€‹â€‹1320 square feet i.e. area-122.67 square meters situated in Mauza-Aude Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Shri Dr. Rana Gopal Singh to Shri Afroz Ahmed. As per rules, the suit was registered and a proclamation letter was issued which was served and the file is attached. In the absence of any objection, the suit is undisputed.\nThe buyer has stated in his evidence that even after purchasing the said land, as per the copy of the sale deed,

[33mcloseness_checker[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['Court of Tehsildar-Sadar, Varanasi. Case number-202414700103039 year-2024 under section-34/35 of UP Revenue Code, Mauza-Aude Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nShri Afroz Ahmed vs Shri Dr. Rana Gopal Singh\n\nDecision\nThis suit of transfer is recorded on the registered sale deed received from the registration office Varanasi\n\nNo.-605 dated-30/01/2024. Which was instituted on the basis of the transfer report of the Lekhpal. Out of the area of â€‹â€‹0.2900 hectare, land number-1114, area of â€‹â€‹1320 square feet i.e. area-122.67 square meters situated in Mauza-Aude Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Shri Dr. Rana Gopal Singh to Shri Afroz Ahmed. As per rules, the suit was registered and a proclamation letter was issued which was served and the file is attached. In the absence of any objection, the suit is undisputed.\nThe 

['\nCourt Tehsildar-Sadar, Varanasi. Case number-J202414700104026 year-2024 under section-34/35 UP Revenue Code, Mauza-Susuwahi Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nVaishnavi Construction vs Shri Sanjeev Kumar\n\nDecision\nThis suit of transfer is mentioned on the registered sale deed number-1003 dated-07/02/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Out of Arazi number-435 area of â€‹â€‹0.4530 hectare, area of â€‹â€‹4342 sq. ft. i.e. area-403.53 sq. m. situated in Mauza-Susuwahi Pargana-Dehat Amanat, Tehsil and District-Varanasi, Shri Sanjeev Kumar has sold it to Bahk Vaishnavi Construction. As per rules, the suit was registered and promulgation letter was issued which was served and the file is attached. In absence of any objection, the suit is undisputed.\n\nThe buyer has stated in his evidence that even after purchasing the said land, as per the original sale deed, extract Khatauni

[33mcode_generator[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['\nCourt Tehsildar-Sadar, Varanasi. Case number-J202414700104026 year-2024 under section-34/35 UP Revenue Code, Mauza-Susuwahi Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nVaishnavi Construction vs Shri Sanjeev Kumar\n\nDecision\nThis suit of transfer is mentioned on the registered sale deed number-1003 dated-07/02/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Out of Arazi number-435 area of â€‹â€‹0.4530 hectare, area of â€‹â€‹4342 sq. ft. i.e. area-403.53 sq. m. situated in Mauza-Susuwahi Pargana-Dehat Amanat, Tehsil and District-Varanasi, Shri Sanjeev Kumar has sold it to Bahk Vaishnavi Construction. As per rules, the suit was registered and promulgation letter was issued which was served and the file is attached. In absence of any objection, the suit is undisputed.\n\nThe buyer has stated 

['Court Tehsildar-Sadar, Varanasi. Case number-J202314700108547 year-2023 under section-34/35 UP Revenue Code, Mauza-Sirgovardhanpur Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nShri Sunit Kumar Giri vs Shri Krishna Kumar Singh\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-1782\ndated-20/03/2023 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Arazi number-1009m. Out of the area of â€‹â€‹0.1540 hectare, area of â€‹â€‹903 square feet i.e. area-83.92 square meters situated in Mauza-Sirgovardhanpur Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Shri Krishna Kumar Singh to the impostor Shri Sunit Kumar Giri. As per rules, the suit was registered and a proclamation letter was issued, which was served and the attached file is attached. The suit has been dismissed on 17-05-2023 in the absence of copy of deed, affidavit and other evidence. The applicant ha

[33mcloseness_checker[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['Court Tehsildar-Sadar, Varanasi. Case number-J202314700108547 year-2023 under section-34/35 UP Revenue Code, Mauza-Sirgovardhanpur Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nShri Sunit Kumar Giri vs Shri Krishna Kumar Singh\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-1782\ndated-20/03/2023 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Arazi number-1009m. Out of the area of â€‹â€‹0.1540 hectare, area of â€‹â€‹903 square feet i.e. area-83.92 square meters situated in Mauza-Sirgovardhanpur Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Shri Krishna Kumar Singh to the impostor Shri Sunit Kumar Giri. As per rules, the suit was registered and a proclamation letter was issued, which was served and the attached file is attached. The suit has

['Court of Tehsildar-Sadar, Varanasi. Case number-J202414700102318 year-2024 under section-34/35 UP Revenue Code, Mauza-Lohta Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nShri Jalaluddin vs Shri Usman\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-181\ndated-10/01/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Out of Arazi number-170 area of â€‹â€‹0.0970 hectare, area of â€‹â€‹652.6 square feet i.e. area-60.64 square meters situated in Mauza-Lohta Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Shri Usman to Shri Jalaluddin. As per rules, the suit was registered and a proclamation letter was issued, which was served and the attached file is attached. The suit has been dismissed on 18-03-2024 in the absence of original deed, affidavit and other evidence. The applicant has given a request for settlement dated 04-04-2024. Which is accepted. The e

[33mcloseness_checker[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['Court of Tehsildar-Sadar, Varanasi. Case number-J202414700102318 year-2024 under section-34/35 UP Revenue Code, Mauza-Lohta Pargana-Dehat Amanat, Tehsil-Sadar District-Varanasi\n\nShri Jalaluddin vs Shri Usman\n\nDecision\nThis suit of transfer is recorded on the registered sale deed number-181\ndated-10/01/2024 received from the registration office Varanasi. Which was instituted on the basis of the transfer report of the Lekhpal. Out of Arazi number-170 area of â€‹â€‹0.0970 hectare, area of â€‹â€‹652.6 square feet i.e. area-60.64 square meters situated in Mauza-Lohta Pargana-Dehat Amanat, Tehsil and District-Varanasi has been sold by Shri Usman to Shri Jalaluddin. As per rules, the suit was registered and a proclamation letter was issued, which was served and the attached file is attached. The suit has been dismissed on 18-03-2024 in the absence of original deed, affidavit and

['{\n    "court_name": {\n        "type": "string",\n        "examples": []\n    },\n    "case_number": {\n        "type": "string",\n        "examples": []\n    },\n    "year": {\n        "type": "number",\n        "examples": []\n    },\n    "section": {\n        "type": "string",\n        "examples": []\n    },\n    "mauza": {\n        "type": "string",\n        "examples": []\n    },\n    "pargana": {\n        "type": "string",\n        "examples": []\n    },\n    "tehsil": {\n        "type": "string",\n        "examples": []\n    },\n    "district": {\n        "type": "string",\n        "examples": []\n    },\n    "plaintiff": {\n        "type": "string",\n        "examples": []\n    },\n    "defendant": {\n        "type": "string",\n        "examples": []\n    },\n    "decision": {\n        "sale_deed_number": {\n            "type": "string",\n            "examples": []\n        },\n        "sale_deed_date": {\n            "type": "string",\n            "examples": []\n        },

[33mcloseness_checker[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['{\n    "court_name": {\n        "type": "string",\n        "examples": []\n    },\n    "case_number": {\n        "type": "string",\n        "examples": []\n    },\n    "year": {\n        "type": "number",\n        "examples": []\n    },\n    "section": {\n        "type": "string",\n        "examples": []\n    },\n    "mauza": {\n        "type": "string",\n        "examples": []\n    },\n    "pargana": {\n        "type": "string",\n        "examples": []\n    },\n    "tehsil": {\n        "type": "string",\n        "examples": []\n    },\n    "district": {\n        "type": "string",\n        "examples": []\n    },\n    "plaintiff": {\n        "type": "string",\n        "examples": []\n    },\n    "defendant": {\n        "type": "string",\n        "examples": []\n    },\n    "decision": {\n        "sale_deed_number": {\n            "type": "string",\n            "examples": []\n  

['{\n    "court_name": {\n        "type": "string",\n        "examples": []\n    },\n    "case_number": {\n        "type": "string",\n        "examples": []\n    },\n    "year": {\n        "type": "number",\n        "examples": []\n    },\n    "section": {\n        "type": "string",\n        "examples": []\n    },\n    "mauza": {\n        "type": "string",\n        "examples": []\n    },\n    "pargana": {\n        "type": "string",\n        "examples": []\n    },\n    "tehsil": {\n        "type": "string",\n        "examples": []\n    },\n    "district": {\n        "type": "string",\n        "examples": []\n    },\n    "plaintiff": {\n        "type": "string",\n        "examples": []\n    },\n    "defendant": {\n        "type": "string",\n        "examples": []\n    },\n    "decision": {\n        "sale_deed_number": {\n            "type": "string",\n            "examples": []\n        },\n        "sale_deed_date": {\n            "type": "string",\n            "examples": []\n        },

[33mcloseness_checker[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
['{\n    "court_name": {\n        "type": "string",\n        "examples": []\n    },\n    "case_number": {\n        "type": "string",\n        "examples": []\n    },\n    "year": {\n        "type": "number",\n        "examples": []\n    },\n    "section": {\n        "type": "string",\n        "examples": []\n    },\n    "mauza": {\n        "type": "string",\n        "examples": []\n    },\n    "pargana": {\n        "type": "string",\n        "examples": []\n    },\n    "tehsil": {\n        "type": "string",\n        "examples": []\n    },\n    "district": {\n        "type": "string",\n        "examples": []\n    },\n    "plaintiff": {\n        "type": "string",\n        "examples": []\n    },\n    "defendant": {\n        "type": "string",\n        "examples": []\n    },\n    "decision": {\n        "sale_deed_number": {\n            "type": "string",\n            "examples": []\n  

KeyboardInterrupt: 