In [1]:
#!pip install gymnasium

In [2]:


# # Add the project root directory to Python path
# project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# sys.path.append(project_root)

#--------------------------------------------------------------------------------------------#


In [1]:
import sys
import os
import json
import gymnasium as gym
from langchain.output_parsers import RegexParser
from langchain.schema import (
    HumanMessage,
    SystemMessage,
)
import numpy as np
from langchain_openai import ChatOpenAI
from utils.LLM_utils import get_completion_gpt4
from utils.jsonparser_utils import clean_llm_output, json_to_dataframe
from actor_agents.schema_builder import schema_building_with_llm
from src.action_space.meta_prompting_agent import adjust_prompt
from evaluation.scoring import calculate_similarity





### Iterative Best Score Environment

In [2]:
class SchemaBuilderEnv(gym.Env):
    def __init__(self, baseprompt, document_text, groundtruth):
        super(SchemaBuilderEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(5)  # 5 possible prompt adjustments
        self.observation_space = gym.spaces.Box(
            low=0, 
            high=float('inf'),  # Perplexity can be any positive number
            shape=(1,),  # [Perplexity]
            dtype=np.float32
        )
        
        # Store schema building related data
        self.baseprompt = baseprompt
        self.document_text = document_text
        self.groundtruth = groundtruth
        self.current_prompt = baseprompt
        self.task_type = "json schema-building"
        
        # Track best scores and results
        self.best_perplexity = float('inf')
        self.best_schema = None
        self.best_prompt = None
        
        # Track consecutive non-improvements
        self.non_improvement_count = 0
        self.max_non_improvements = 2
        self.current_step = 0
        
        self.state = None

    def step(self, action):
        self.current_step += 1

        print(f"\n............................. ITERATION {self.current_step} BEGINS.....................................")

        # Get updated prompt using meta-prompting agent
        updated_prompt = adjust_prompt(
            actor_prompt=self.current_prompt,
            task_type=self.task_type,
            state=self.state,
            action=action,
            groundtruth=self.groundtruth,
            generated_output=self.last_output if hasattr(self, 'last_output') else None
        )
        self.current_prompt = updated_prompt

        # Generate new schema and get scores
        _, self.last_output, perplexity_score = schema_building_with_llm(updated_prompt, self.document_text)

        
        print(f"\nStep {self.current_step}")
        print(f"\nUpdated Prompt: {_}")
        print("Updated Schema:\n", self.last_output)

        # Update state (lower perplexity is better)
        self.state = np.array([perplexity_score], dtype=np.float32)
        
        # Check if current scores are better
        # Check if current score is better than best score
        if perplexity_score < self.best_perplexity:
            self.best_perplexity = perplexity_score
            self.best_schema = self.last_output
            self.best_prompt = self.current_prompt
            self.non_improvement_count = 0
            improved = True
        else:
            self.non_improvement_count += 1
            improved = False

        # Calculate reward (negative because we want to minimize perplexity)
        reward = self.best_perplexity - perplexity_score

        # Determine if we should terminate
        done = self.non_improvement_count >= self.max_non_improvements

        # Create info dictionary
        info = {
            'perplexity': perplexity_score,
            'best_perplexity': self.best_perplexity,
            'improved': improved,
            'non_improvement_count': self.non_improvement_count,
            'steps': self.current_step
        }

        print(f"Current Perplexity: {perplexity_score:.4f}")
        print(f"Best Perplexity: {self.best_perplexity:.4f}")
        print(f"Improved: {improved}")
        print(f"Non-improvement count: {self.non_improvement_count}")
        
        if done:
            print("\nTerminating due to no improvements in last two updates")
            print(f"Best Results Achieved:")
            print(f"Perplexity: {self.best_perplexity:.4f}")
            print("\nBest Schema:")
            print(self.best_schema)

        return self.state, reward, done, info

    def reset(self):
        """Reset the environment to initial state"""
        self.current_step = 0
        self.non_improvement_count = 0
        self.current_prompt = self.baseprompt
        
        # Generate initial schema and calculate scores
        _, self.last_output, perplexity_score = schema_building_with_llm(self.current_prompt, self.document_text)
        
        print(f"\nInitial Prompt: {_}")
        print("Initial Schema:\n", self.last_output)
        print(f"Initial Perplexity: {perplexity_score:.4f}")

        # Initialize best scores
        self.best_perplexity = perplexity_score
        self.best_schema = self.last_output
        self.best_prompt = self.current_prompt
        
        self.state = np.array([perplexity_score], dtype=np.float32)
        return self.state, {}
    
    def get_best_results(self):
        """Return the best results achieved during the episode"""
        return {
            'best_perplexity': self.best_perplexity,
            'best_schema': self.best_schema,
            'best_prompt': self.best_prompt
        }

In [None]:
class SchemaBuilderEnv(gym.Env):
    def __init__(self, baseprompt, document_text, groundtruth):
        super(SchemaBuilderEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(5)  # 5 possible prompt adjustments
        self.observation_space = gym.spaces.Box(
            low=np.array([0.0, 0.0]),  # [Perplexity, Similarity]
            high=np.array([float('inf'), 1.0]),  # Max perplexity is inf, max similarity is 1
            shape=(2,),
            dtype=np.float32
        )
        
        # Store schema building related data
        self.baseprompt = baseprompt
        self.document_text = document_text
        self.groundtruth = groundtruth
        self.current_prompt = baseprompt
        self.task_type = "json schema-building"
        
        # Track best scores and results
        self.best_perplexity = float('inf')
        self.best_similarity = 0.0
        self.best_schema = None
        self.best_prompt = None
        
        # Track consecutive non-improvements
        self.non_improvement_count = 0
        self.max_non_improvements = 2
        self.current_step = 0
        
        self.state = None

    def step(self, action):
        self.current_step += 1

        print(f"\n............................. ITERATION {self.current_step} BEGINS.....................................")

        # Get updated prompt using meta-prompting agent
        updated_prompt = adjust_prompt(
            actor_prompt=self.current_prompt,
            task_type=self.task_type,
            state=self.state,
            action=action,
            groundtruth=self.groundtruth,
            generated_output=self.last_output if hasattr(self, 'last_output') else None
        )
        self.current_prompt = updated_prompt

        # Generate new schema and get scores
        _, self.last_output, perplexity_score = schema_building_with_llm(updated_prompt, self.document_text)
        similarity_score = calculate_similarity(self.last_output, self.groundtruth)
        
        print(f"\nStep {self.current_step}")
        print(f"\nUpdated Prompt: {_}")
        print("Updated Schema:\n", self.last_output)

        # Update state with both metrics
        self.state = np.array([perplexity_score, similarity_score], dtype=np.float32)
        
        # Check if current scores are better
        improved = False
        if (perplexity_score < self.best_perplexity or similarity_score >= self.best_similarity) or \
           (perplexity_score <= self.best_perplexity or similarity_score > self.best_similarity):
            self.best_perplexity = perplexity_score
            self.best_similarity = similarity_score
            self.best_schema = self.last_output
            self.best_prompt = self.current_prompt
            self.non_improvement_count = 0
            improved = True
        else:
            self.non_improvement_count += 1

        # Calculate combined reward
        perplexity_reward = self.best_perplexity - perplexity_score
        similarity_reward = similarity_score - self.best_similarity
        reward = perplexity_reward + similarity_reward

        # Determine if we should terminate
        done = self.non_improvement_count >= self.max_non_improvements

        # Create info dictionary
        info = {
            'perplexity': perplexity_score,
            'similarity': similarity_score,
            'best_perplexity': self.best_perplexity,
            'best_similarity': self.best_similarity,
            'improved': improved,
            'non_improvement_count': self.non_improvement_count,
            'steps': self.current_step
        }

        print(f"Current Perplexity: {perplexity_score:.4f}")
        print(f"Current Similarity: {similarity_score:.4f}")
        print(f"Best Perplexity: {self.best_perplexity:.4f}")
        print(f"Best Similarity: {self.best_similarity:.4f}")
        print(f"Improved: {improved}")
        print(f"Non-improvement count: {self.non_improvement_count}")
        
        if done:
            print("\nTerminating due to no improvements in last two updates")
            print(f"Best Results Achieved:")
            print(f"Perplexity: {self.best_perplexity:.4f}")
            print(f"Similarity: {self.best_similarity:.4f}")
            print("\nBest Schema:")
            print(self.best_schema)

        return self.state, reward, done, info

    def reset(self):
        """Reset the environment to initial state"""
        self.current_step = 0
        self.non_improvement_count = 0
        self.current_prompt = self.baseprompt
        
        # Generate initial schema and calculate scores
        _, self.last_output, perplexity_score = schema_building_with_llm(self.current_prompt, self.document_text)
        similarity_score = calculate_similarity(self.last_output, self.groundtruth)
        
        print(f"\nStart Prompt: {_}")
        print("Start Schema:\n", self.last_output)

        # Initialize best scores
        self.best_perplexity = perplexity_score
        self.best_similarity = similarity_score
        self.best_schema = self.last_output
        self.best_prompt = self.current_prompt
        
        self.state = np.array([perplexity_score, similarity_score], dtype=np.float32)
        return self.state, {}

### Gymansium Agent

In [3]:
class GymnasiumAgent:

    @classmethod
    def get_docs(cls, env):
        return env.unwrapped.__doc__

    def __init__(self, model, env):
        self.model = model
        self.env = env
        self.docs = self.get_docs(env)

        self.instructions = """
Your goal is to maximize your return, i.e., the sum of the rewards you receive.
I will give you an observation, reward, termination flag, truncation flag, and the return so far, formatted as:

Observation: <observation>
Reward: <reward>
Termination: <termination>
Truncation: <truncation>
Return: <sum_of_rewards>

You will respond with an action, formatted as:

Action: <action>

where you replace <action> with your actual action.
"""
        self.action_parser = RegexParser(
            regex=r"Action: (.*)", output_keys=["action"],
        )

    def interact(self):
        observation, _ = self.env.reset()
        terminated = False
        total_reward = 0

        while not terminated:
            # Format observation for better readability
            obs_dict = {
                'Perplexity': observation[0]
            }
            print("\nCurrent State:")
            for metric, value in obs_dict.items():
                print(f"{metric}: {value:.4f}")

            # Generate a response (action) using the model
            response = self.model([
                SystemMessage(content=self.instructions),
                HumanMessage(content=f"""
Current observation: {obs_dict}
Current total reward: {total_reward:.4f}
Task completed: {terminated}

Based on these metrics, what action (0-4) would you take to improve the extraction?
Remember to respond ONLY with "Action: <number>"
""")
            ])

            try:
                action = int(self.action_parser.parse(response.content)['action'])
                if not (0 <= action <= 4):
                    raise ValueError("Action must be between 0 and 4")
            except (ValueError, KeyError) as e:
                print(f"Invalid response from model: {response.content}")
                print("Defaulting to action 0")
                action = 0

            action = int(self.action_parser.parse(response.content)['action'])

            # Perform action in the environment
            observation, reward, terminated, info = self.env.step(action)
            total_reward += reward

            print(f"\nAction taken: {action}")
            print(f"Reward: {reward:.4f}")
            print(f"\nTerminated: {terminated}")
            print(f"Total Return: {total_reward:.4f}")
            print("Metrics:", info)

        print("\nTask completed successfully!")

In [4]:
def load_prompt_from_file(filename):
    """Load prompt template from a text file"""
    # Get the absolute path to the project root directory
    project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
    
    # Construct path to the prompts directory
    prompt_path = os.path.join(project_root, 'src', 'actor_agents', 'Prompts', filename)
    
    try:
        with open(prompt_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        raise FileNotFoundError(f"Prompt file not found at: {prompt_path}")

# Load the base prompt template
document_type = 'invoice'
try:
    base_prompt = load_prompt_from_file('schema_builder_prompt.txt')
    print("Successfully loaded prompt template")
    print(base_prompt)
except FileNotFoundError as e:
    print(f"Error loading prompt: {e}")

Successfully loaded prompt template
You are a JSON schema builder. You will receive an unstructured form-like document and you will perform following tasks:

1. Extract only the key phrases in key-value pairs, and header names in tables from the document. 

2. Arrange all keys phrases with data types in the parent region of the json and table header names in the child region.

3. Ensure the order of key names and table headers aligns with the original text.

4. ONLY include unique column names as they appear in the document, without duplicates or associated data. 

5. ONLY Respond with the JSON schema.

Here is the content of the form-like document:



In [5]:
## INPUT PARAMS TEST

if __name__ == "__main__":
    # Example initial state and actions
    invoice = f"""
-----------------Invoice------------------
                              Page 1 of 3

Invoice Number: INV-12345
Customer: XYZ Corp
Invoice Date: 2024-06-01


Item    Quantity    Price     Total
item_1     5         $100      500
item_2     10        $50       500
item_3     6         $10       60

					Subtotal: 1060
					Total GST: 500
					Total Amount: $1560
--------------------------------------------
"""
    schema = schema = {
    "invoice_number": "string",
    "customer": "string",
    "invoice_date": "yyyy-mm-dd",
    "sub_total": "number",
    "total_GST": "number",
    "total_amount": "number",
    "Line_Items": [
        {
            "item": "string",
            "quantity": "number",
            "price": "number",
            "total": "number"
        }
    ] 
    }
   
    generated_output = {
    "invoice_number": "INV-12345",
    "invoice_date": "2024-06-01",
    "sub_total": 1060,
    "total_amount": 1560, 
    "Line_Items": [
        {
        "item": "item_1",
        "quantity": 5,
        "price": "$100",
        "total": 500
        },
        {
        "item": "item_2",
        "quantity": 10,
        "price": "$50",
        "total": 500
        }
    ]
    } 
    groundtruth = {
    "invoice_number": "INV-12345",
    "customer": "XYZ Corp",
    "invoice_date": "2024-06-01",
    "sub_total": 1060,
    "total_GST":500,
    "total_amount": 1560,
    "Line_Items": [
        {
        "item": "item_1",
        "quantity": 5,
        "price": 100,
        "total": 500
        },
        {
        "item": "item_2",
        "quantity": 10,
        "price": 50,
        "total": 500
        },
        {
        "item": "item_3",
        "quantity": 6,
        "price": 10,
        "total": 60
        }
    ]
    }
    
    # print(document_extractor_agent(base_prompt, document_type, invoice, schema))
    # print(calculate_exact_match(generated_output,groundtruth))

In [7]:
# Create environment and agent
env = SchemaBuilderEnv(
    baseprompt=base_prompt,
    document_text=invoice,
    groundtruth=schema
)
agent = GymnasiumAgent(model=ChatOpenAI(temperature=0.2), env=env)

# Run the interaction
agent.interact()

# # Get the best results
# best_results = env.get_best_results()
# print("\nBest Results:")
# print(f"Perplexity: {best_results['best_perplexity']:.4f}")
# print("\nBest Schema:")
# print(best_results['best_schema'])

TypeError: GymnasiumAgent.interact() got an unexpected keyword argument 'verbose'