In [2]:
import gymnasium as gym
from langchain.output_parsers import RegexParser
from langchain.schema import (
    HumanMessage,
    SystemMessage,
)
import numpy as np
from tenacity import retry, stop_after_attempt, wait_fixed
from langchain_openai import ChatOpenAI

In [3]:

class DataExtractionEnv(gym.Env):
    """
    Custom Gymnasium environment for the Agentic Data Extraction process.
    The agent interacts with the environment to improve data extraction accuracy.
    Observations include Exact Match Score and Similarity Score.
    Actions represent adjustments to prompt engineering strategies.
    """
    def __init__(self):
        super(DataExtractionEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(5)  # 5 possible prompt adjustments
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)  # [Exact Match, Similarity]
        self.state = None
        self.reset()

    def step(self, action):
        # Simulate feedback based on action
        exact_match_score = self.state[0] + np.random.uniform(-0.05, 0.1)
        similarity_score = self.state[1] + np.random.uniform(-0.05, 0.1)

        # Reward: product of scores minus a penalty for suboptimal actions
        reward = exact_match_score * similarity_score - abs(action - 2) * 0.05

        # Update state
        self.state = np.clip([exact_match_score, similarity_score], 0, 1)

        # Check if task is complete
        done = bool(self.state[0] >= 0.95 and self.state[1] >= 0.95)

        # Additional info
        info = {}
        return np.array(self.state), reward, done, info

    def reset(self):
        self.state = np.random.uniform(0.2, 0.4, size=(2,))
        return np.array(self.state)


In [4]:
class GymnasiumAgent:
    @classmethod
    def get_docs(cls, env):
        return env.unwrapped.__doc__

    def __init__(self, model, env):
        self.model = model
        self.env = env
        self.docs = self.get_docs(env)

        self.instructions = """
Your goal is to maximize your return, i.e., the sum of the rewards you receive.
I will give you an observation, reward, termination flag, truncation flag, and the return so far, formatted as:

Observation: <observation>
Reward: <reward>
Termination: <termination>
Truncation: <truncation>
Return: <sum_of_rewards>

You will respond with an action, formatted as:

Action: <action>

where you replace <action> with your actual action.
"""
        self.action_parser = RegexParser(
            regex=r"Action: (.*)", output_keys=["action"],
        )

    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
    def get_action(self, observation, total_reward, terminated):
        """Retry logic for getting action from the model."""
        response = self.model([
            SystemMessage(content=self.instructions),
            HumanMessage(content=f"Observation: {observation}\nReward: {total_reward}\nTermination: {terminated}\nTruncation: False\nReturn: {total_reward}")
        ])

        return int(self.action_parser.parse(response.content)['action'])

    def interact(self):
        observation, _ = self.env.reset()
        terminated = False
        total_reward = 0

        while not terminated:
            print(f"Observation: {observation}")

            try:
                action = self.get_action(observation, total_reward, terminated)
            except Exception as e:
                print(f"Action retrieval failed after retries: {e}")
                break

            # Perform action in the environment
            observation, reward, terminated, _ = self.env.step(action)
            total_reward += reward

            print(f"Action: {action}, Reward: {reward}, Total Return: {total_reward}")

        print("Task completed successfully!")


In [5]:
env = DataExtractionEnv()
agent = GymnasiumAgent(model=ChatOpenAI(temperature=0.2), env=env)
agent.interact()

Observation: 0.3875411494014725


  response = self.model([


Action: 0, Reward: 0.07086501457090794, Total Return: 0.07086501457090794
Observation: [0.44220968 0.38638913]
Action retrieval failed after retries: RetryError[<Future at 0x16b35f59110 state=finished raised ValueError>]
Task completed successfully!
