In [None]:
import numpy as np
import gymnasium as gym

In [None]:
class mystate():
    def __init__(self,pos1,pos2,reward):
        self.pos1=pos1
        self.pos2=pos2
        self.reward=reward

class GridWorldEnv(gym.Env):

    def __init__(self,size: int = 5,special_states: dict ={}, terminal_states: dict ={}):
        self.size = size
        self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)
        self.special_states=special_states
        self.terminal_states=special_states

        self.observation_space = gym.spaces.Dict(
            {
                "agent": gym.spaces.Box(0, size - 1, shape=(2,), dtype=int),

            }
        )

        self.action_space = gym.spaces.Discrete(4)
        self._action_to_direction = {
            0: np.array([1, 0]),  # right
            1: np.array([0, 1]),  # up
            2: np.array([-1, 0]),  # left
            3: np.array([0, -1]),  # down
        }

    def _get_obs(self):
        return {"agent": self._agent_location}

    def _get_info(self):
        distance_dict={}

        for special_state_name, special_state in self.special_states.items():
            distance_dict[special_state_name]= np.linalg.norm(self._agent_location - special_state.pos1,ord=1)
        return distance_dict


    def reset(self, seed = None, options = None):

        self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)

        observation = self._get_obs()
        info = self._get_info()

        return observation, info

    def step(self, action):
        action=int(action)
        direction = self._action_to_direction[action]
        in_special_state=False

        for name, state in self.special_states.items():
                if np.array_equal(self._agent_location, state.pos1):
                    reward=state.reward
                    self._agent_location=state.pos2
                    in_special_state=True
                    break
        if not in_special_state:
            agent_location_new = np.clip(
                self._agent_location + direction, 0, self.size - 1
            )
            if np.array_equal(self._agent_location,agent_location_new):
                    reward=-1
            else:
                    reward=0
            self._agent_location=agent_location_new

        terminated=False
        truncated=False
        obs=self._get_obs()
        info=self._get_info()
        return  obs, reward, terminated, truncated, info
    
    
    def render(self, mode='human'):
        """
        Visualisiert den aktuellen Zustand der Umgebung.
        Wenn mode='human', wird der Zustand in der Konsole als Text ausgegeben.
        """
        # Erstelle ein leeres Grid mit 'O'
        grid = [["O" for _ in range(self.size)] for _ in range(self.size)]

        # Positioniere den Agenten im Grid
        x, y = self._agent_location
        

        for name, state in self.special_states.items():
             i1,i2=state.pos1
             grid[i1][i2]=name

        # Gebe das Grid in der Konsole aus
        print("\n".join([" ".join(row) for row in grid]))
        print("\n")  # Leerzeile für bessere Lesbarkeit

In [None]:
gym.register(
    id="gymnasium_env/GridWorld-v10",
    entry_point=GridWorldEnv,
)

In [None]:
special_states={'A': mystate(np.array([0,1]),np.array([4,1]),10), 'B': mystate(np.array([0,3]),np.array([2,3]),5)}
env = gym.make("gymnasium_env/GridWorld-v10", size=5,special_states=special_states)

In [None]:
def policy_eval(pi)
    

In [None]:
eps_length=1000
env.reset()

for i in range(eps_length):
    action=env.action_space.sample()
    a = env.step(action)
    env.render()
    