In [12]:
import os
import autogen
from autogen import AssistantAgent, UserProxyAgent, ConversableAgent
from IPython import get_ipython

In [13]:
OAI_CONFIG_LIST = {
    'model': 'gpt-4o',
    'api_key': 'sk-'
}

In [14]:
OPENAI_MODEL = "gpt-4o"

config_list = [{
    "model": OPENAI_MODEL,
    "api_key": "sk-"
}]

In [15]:
llm_config = {
    "timeout": 600,
    "cache_seed": 42,
    "config_list": config_list,
    "temperature": 0,
    "functions": [
        {
            "name": "python",
            "description": "run cell in ipython and return the execution result.",
            "parameters": {
                "type": "object",
                "properties": {
                    "cell": {
                        "type": "string",
                        "description": "Valid Python cell to execute.",
                    }
                },
                "required": ["cell"],
            },
        }
    ],
    "config_list": config_list,
    "timeout": 120,
}


In [16]:
PATH=os.getcwd()
batch_size=5
evaluation_sample_size=20

In [17]:
gpt_config = {
    "cache_seed": 42,
    "temperature": 0,
    "config_list": config_list,
    "timeout": 600,
}

In [18]:
# Schema Updater Agent
schema_gen = autogen.AssistantAgent(
    name="schema_gen",
    llm_config=llm_config,
    system_message=f"Specializes in reading court order documents iteratively and generating comprehensive schemas with all important information. Reads and updates schema for only {batch_size} documents at a time, updating the schema after each batch. Schemas should be strictly based on the information in the documents. No dummy schema or hallucination allowed. Assign correct type to the keys based on the information in the documents, not your intuition. Ensure correct variable type for the keys. Use bools for yes/no questions, strings for text, and numbers for numerical values.",
)

# Code Generator Agent
code_generator = autogen.AssistantAgent(
    name="code_generator",
    llm_config=llm_config,
    system_message="Generates necessary code for processing documents and managing schemas, returning it in the required format. Also executes the code. Strictly, only use python code generation."
)

# User Interface Agent
user_interface = autogen.UserProxyAgent(
   name="user_interface",
   system_message="Admin interface for user input. Supports user queries and termination signals. Invoke human feedback after each batch of schema generation and evaluation",
   code_execution_config={"work_dir":"coding", "use_docker": False},
   max_consecutive_auto_reply=3,
   llm_config=llm_config,
   human_input_mode="ALWAYS"
)

closeness_checker = autogen.AssistantAgent(
    name="closeness_checker",
    llm_config=llm_config,
    system_message=(
         f"""Regenerates a random sample of {evaluation_sample_size} percent of original documents in a batch by just using the schema generated till then. It then compares the generated regenerated with the original documents. Evaluates regenerated court order documents to ensure all information from the original document is incorporated.
            By this comparison, the keys/questions not incorprated in the schema are identified and reported.
        Provides detailed feedback on the generated schema of the given batch to optimize the completeness of the schemas. It is strict critic of generated schema and flawlessly ensures that not even minute bit of information is lost. """
    )
)

In [19]:
def exec_python(cell):
    ipython = get_ipython()
    result = ipython.run_cell(cell)
    log = str(result.result)
    if result.error_before_exec is not None:
        log += f"\n{result.error_before_exec}"
    if result.error_in_exec is not None:
        log += f"\n{result.error_in_exec}"
    return log

def exec_sh(script):
    return user_interface.execute_code_blocks([("sh", script)])

In [20]:
schema_gen.register_function(function_map={
        "python": exec_python
})

code_generator.register_function(function_map={
        "python": exec_python
})
user_interface.register_function(function_map={
        "python": exec_python
})

closeness_checker.register_function(function_map={
        "python": exec_python
})

In [21]:
groupchat = autogen.GroupChat(agents=[user_interface, schema_gen,code_generator,closeness_checker], messages=[], max_round=100000)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt_config)

In [22]:
user_interface.initiate_chat(
    manager,
    message=f""" Use Python only. Return the schema in standard JSON format. Load the text court orders from the provided folder path {PATH}. The files are inside the folder. First and foremost, read the text files for first batch. Calculate number of batches before processing first batch.
    Read the court judgments iteratively, updating and improving the schema each time after reading a batch. Strictly perform document regeneration and closeness checking at the end updating schema for each batch by randomly selecting documents to regenerate and compare. Do not skip this step.
    Focus on generating a standardized schema with keys and their datatypes only, not values. Make it clear, I do not want to store any data from these documents but want to study the strcuture of these documents; that is, what are the questions that are needed to be answered to create document like these.
    """
)


[33muser_interface[0m (to chat_manager):

 Use Python only. Return the schema in standard JSON format. Load the text court orders from the provided folder path e:\schemaGenerator\pdf_processing_with_translation. The files are inside the folder. First and foremost, read the text files for first batch. Calculate number of batches before processing first batch.
    Read the court judgments iteratively, updating and improving the schema each time after reading a batch. Strictly perform document regeneration and closeness checking at the end updating schema for each batch by randomly selecting documents to regenerate and compare. Do not skip this step.
    Focus on generating a standardized schema with keys and their datatypes only, not values. Make it clear, I do not want to store any data from these documents but want to study the strcuture of these documents; that is, what are the questions that are needed to be answered to create document like these.
    

----------------------------

1

[33mcode_generator[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
1
[32m***************************************************[0m

--------------------------------------------------------------------------------
[33mschema_gen[0m (to chat_manager):

There is only 1 batch to process. Let's proceed with reading the first batch of documents and generating the initial schema.

I'll read the first 5 documents (or fewer if there are not enough) and generate the initial schema.
[32m***** Suggested function Call: python *****[0m
Arguments: 
{"cell":"# Read the first batch of text files\nbatch_files = text_files[:5]\n\n# Read the content of each file\nbatch_contents = []\nfor file in batch_files:\n    with open(os.path.join(folder_path, file), 'r', encoding='utf-8') as f:\n        batch_contents.append(f.read())\n\nbatch_contents"}
[32m*******************************************[0m

---------------------------------------------------------------------

["ITEM NO.41               COURT NO.13               SECTION II-B\n               S U P R E M E  C O U R T  O F  I N D I A\n                       RECORD OF PROCEEDINGS\nPetition(s) for Special Leave to Appeal (Crl.)  No(s).  8057/2024\n(Arising out of impugned final judgment and order dated  09-05-2024\nin SCRLA No. 7347/2021 passed by the High Court Of Gujarat At\nAhmedabad)\nGEETA KARSANDAS SENMA                              Petitioner(s)\n                                VERSUS\nSTATE OF GUJARAT & ANR.                            Respondent(s)\n(IA  No.131560/2024-EXEMPTION  FROM  FILING  C/C  OF  THE  IMPUGNED\nJUDGMENT and IA No.131561/2024-EXEMPTION FROM FILING O.T. )\n \nDate : 14-06-2024 This petition was called on for hearing today.\nCORAM : \n         HON'BLE MR. JUSTICE SANJAY KUMAR\n         HON'BLE MR. JUSTICE AUGUSTINE GEORGE MASIH\n[VACATION BENCH]\nFor Petitioner(s) Mr. Virat Popat, Adv.\nMs. Shweta Lodha, Adv.\n                   Mr. Lokesh Kumar Choudhary, AOR\n       

[33muser_interface[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
["ITEM NO.41               COURT NO.13               SECTION II-B\n               S U P R E M E  C O U R T  O F  I N D I A\n                       RECORD OF PROCEEDINGS\nPetition(s) for Special Leave to Appeal (Crl.)  No(s).  8057/2024\n(Arising out of impugned final judgment and order dated  09-05-2024\nin SCRLA No. 7347/2021 passed by the High Court Of Gujarat At\nAhmedabad)\nGEETA KARSANDAS SENMA                              Petitioner(s)\n                                VERSUS\nSTATE OF GUJARAT & ANR.                            Respondent(s)\n(IA  No.131560/2024-EXEMPTION  FROM  FILING  C/C  OF  THE  IMPUGNED\nJUDGMENT and IA No.131561/2024-EXEMPTION FROM FILING O.T. )\n \nDate : 14-06-2024 This petition was called on for hearing today.\nCORAM : \n         HON'BLE MR. JUSTICE SANJAY KUMAR\n         HON'BLE MR. JUSTICE AUGUSTINE GEORGE MASIH\n[VACATION BENCH]\nFor Petitioner(s) M

NameError: name 'false' is not defined

[33mcloseness_checker[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
None
name 'false' is not defined
[32m***************************************************[0m

--------------------------------------------------------------------------------
[33mcode_generator[0m (to chat_manager):

[32m***** Suggested function Call: python *****[0m
Arguments: 
{"cell":"import random\n\n# Randomly select a document to regenerate and compare\nselected_doc = random.choice(batch_contents)\n\n# Regenerate the document based on the schema\nregenerated_doc = {\n    \"case_number\": \"8057/2024\",\n    \"court_name\": \"SUPREME COURT OF INDIA\",\n    \"section\": \"II-B\",\n    \"jurisdiction\": \"Special Leave to Appeal (Crl.)\",\n    \"petitioner\": \"GEETA KARSANDAS SENMA\",\n    \"respondent\": \"STATE OF GUJARAT & ANR.\",\n    \"date_of_judgment\": \"14-06-2024\",\n    \"judges\": [\n        {\"name\": \"SANJAY KUMAR\", \"designation\": \"HON'BLE MR. JUSTICE\

('in the supreme court of india\ncriminal appellate jurisdiction\nCriminal Appeal No. 1232/2024\n\nRameshar Appellant(s)\nVs\nState of Chhattisgarh Defendant(s)\nOrder\n1. On 26.02.2024, this Court took note of the submission presented by the learned counsel for the Appellant that the Appellant has been in jail for the last 18 years including remissions and thereafter the respondent/State has been imprisoned as per the prevailing policy of the State Government. Accordingly, directed to consider the case of premature release of the appellant.\n2. Today, Shri Apoorva Shukla, learned standing counsel appearing for the State of Chhattisgarh informed that the request for premature release of the Appellant has been considered and he has been released on 07.03.2024. 3. In view of the above, learned counsel for the appellant submitted that the appeal has become infructuous. Accordingly, the appeal is dismissed as infructuous. 4. Pending applications, if any, will be closed. ................J.\

[33mcode_generator[0m (to chat_manager):

[32m***** Response from calling function "python" *****[0m
('in the supreme court of india\ncriminal appellate jurisdiction\nCriminal Appeal No. 1232/2024\n\nRameshar Appellant(s)\nVs\nState of Chhattisgarh Defendant(s)\nOrder\n1. On 26.02.2024, this Court took note of the submission presented by the learned counsel for the Appellant that the Appellant has been in jail for the last 18 years including remissions and thereafter the respondent/State has been imprisoned as per the prevailing policy of the State Government. Accordingly, directed to consider the case of premature release of the appellant.\n2. Today, Shri Apoorva Shukla, learned standing counsel appearing for the State of Chhattisgarh informed that the request for premature release of the Appellant has been considered and he has been released on 07.03.2024. 3. In view of the above, learned counsel for the appellant submitted that the appeal has become infructuous. Accordingly, the

ChatResult(chat_history=[{'content': ' Use Python only. Return the schema in standard JSON format. Load the text court orders from the provided folder path e:\\schemaGenerator\\pdf_processing_with_translation. The files are inside the folder. First and foremost, read the text files for first batch. Calculate number of batches before processing first batch.\n    Read the court judgments iteratively, updating and improving the schema each time after reading a batch. Strictly perform document regeneration and closeness checking at the end updating schema for each batch by randomly selecting documents to regenerate and compare. Do not skip this step.\n    Focus on generating a standardized schema with keys and their datatypes only, not values. Make it clear, I do not want to store any data from these documents but want to study the strcuture of these documents; that is, what are the questions that are needed to be answered to create document like these.\n    ', 'role': 'assistant'}, {'cont