<a href="https://colab.research.google.com/github/GOPIKA-S-S/RL/blob/main/2348518_lab6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install numpy gym




Create the Custom Environment

In [4]:
import gym
from gym import spaces
import numpy as np

class GridWorldEnv(gym.Env):
    def __init__(self, grid_size=5):
        super(GridWorldEnv, self).__init__()

        self.grid_size = grid_size
        self.action_space = spaces.Discrete(4)  # Four possible actions: up, down, left, right
        self.observation_space = spaces.Box(low=0, high=grid_size-1, shape=(2,), dtype=np.int32)

        # Define the starting position and goal position
        self.state = None
        self.goal = (grid_size - 1, grid_size - 1)  # bottom-right corner

    def reset(self):
        # Reset the agent to the top-left corner (0, 0)
        self.state = (0, 0)
        return np.array(self.state, dtype=np.int32)

    def step(self, action):
        # Define the movement based on action taken
        if action == 0:   # up
            self.state = (max(self.state[0] - 1, 0), self.state[1])
        elif action == 1: # down
            self.state = (min(self.state[0] + 1, self.grid_size - 1), self.state[1])
        elif action == 2: # left
            self.state = (self.state[0], max(self.state[1] - 1, 0))
        elif action == 3: # right
            self.state = (self.state[0], min(self.state[1] + 1, self.grid_size - 1))

        # Check if the goal has been reached
        done = self.state == self.goal
        reward = 1 if done else -0.01  # Reward for reaching goal, small penalty for each step taken

        return np.array(self.state, dtype=np.int32), reward, done, {}

    def render(self, mode='human'):
        grid = np.zeros((self.grid_size, self.grid_size))
        grid[self.state] = 1  # Agent position
        grid[self.goal] = 2  # Goal position
        print(grid)

    def close(self):
        pass


 Using the Environment

In [5]:
if __name__ == "__main__":
    env = GridWorldEnv()
    total_episodes = 5

    for episode in range(total_episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            action = env.action_space.sample()  # Random action (replace with your model's action)
            next_state, reward, done, _ = env.step(action)
            total_reward += reward
            env.render()  # Visualize the grid
            state = next_state

        print(f"Episode {episode + 1} Total Reward: {total_reward}")

    env.close()


[[0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 2.]]