<a href="https://colab.research.google.com/github/Charish53/RL_lab/blob/main/LAB01/CS22B1095_LAB01_Cliff_walker_env.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Cliff Walker Environment

In [1]:
import numpy as np
import pandas as pd
import gymnasium as gym
from gym import spaces,Env

In [2]:
import numpy as np
from gym import Env, spaces

class CliffWalkingEnv(Env):
    def __init__(self, grid_size=(4, 12)):
        super(CliffWalkingEnv, self).__init__()

        # Define grid size
        self.grid_size = grid_size

        # Action space: 0 = Left, 1 = Down, 2 = Right, 3 = Up
        self.action_space = spaces.Discrete(4)

        # Observation space: Each state corresponds to a grid cell
        self.observation_space = spaces.Discrete(grid_size[0] * grid_size[1])

        # Cliff layout: 0 = safe, 1 = cliff, 2 = goal
        self.cliff = [(3, i) for i in range(1, grid_size[1] - 1)]  # Cliff rows
        self.goal = (3, grid_size[1] - 1)  # Goal at the bottom-right
        self.start = (3, 0)  # Starting position

        self.layout = np.zeros(grid_size, dtype=int)
        self.layout[self.start] = 0  # Start is safe
        self.layout[self.goal] = 2  # Goal
        for pos in self.cliff:
            self.layout[pos] = 1  # Cliff cells

        # Rewards: Goal = 1.0, Cliff = -100.0, Safe = -1.0
        self.rewards = np.full(grid_size, -1.0, dtype=float)
        self.rewards[self.goal] = 1.0
        for pos in self.cliff:
            self.rewards[pos] = -100.0

        self.state = self.start  # Initialize the agent's position

    def step(self, action):
        """
        Executes one step in the environment.
        """
        x, y = self.state

        # Update position based on action
        if action == 0:  # Left
            y = max(0, y - 1)
        elif action == 1:  # Down
            x = min(self.grid_size[0] - 1, x + 1)
        elif action == 2:  # Right
            y = min(self.grid_size[1] - 1, y + 1)
        elif action == 3:  # Up
            x = max(0, x - 1)

        # Update state
        self.state = (x, y)

        # Get reward and check if the episode is done
        reward = self.rewards[x, y]
        done = self.state == self.goal or self.state in self.cliff

        # If agent falls into the cliff, reset to start
        if self.state in self.cliff:
            self.state = self.start
            done = False

        return self._get_state_index(), reward, done, {}

    def reset(self):
        """
        Resets the environment to the initial state.
        """
        self.state = self.start
        return self._get_state_index()

    def render(self):
        """
        Renders the current state of the environment.
        """
        grid = np.array(self.layout, dtype=str)
        grid[self.layout == 0] = "."  # Safe cells
        grid[self.layout == 1] = "C"  # Cliff
        grid[self.layout == 2] = "G"  # Goal
        x, y = self.state
        grid[x, y] = "A"  # Agent's position
        print("\n".join(" ".join(row) for row in grid))
        print("\n")

    def _get_state_index(self):
        """
        Converts the current (x, y) position to a flat index.
        """
        return self.state[0] * self.grid_size[1] + self.state[1]


# Example usage
if __name__ == "__main__":
    env = CliffWalkingEnv(grid_size=(4, 12))
    state = env.reset()
    done = False

    print("Initial Environment:")
    env.render()

    while not done:
        action = env.action_space.sample()  # Random policy
        next_state, reward, done, info = env.step(action)
        print(f"\nAction: {action}")
        env.render()
        print(f"Reward: {reward}")

        if done:
            if reward == 1.0:
                print("Congratulations! You reached the goal!")
            elif reward == -100.0:
                print("Oops! You fell into the cliff.")


  and should_run_async(code)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
. C C C C C C C C C C G


Reward: -1.0

Action: 0
. . . . . . . . . . . .
. . . . . . . . . . . .
A . . . . . . . . . . .
. C C C C C C C C C C G


Reward: -1.0

Action: 3
. . . . . . . . . . . .
A . . . . . . . . . . .
. . . . . . . . . . . .
. C C C C C C C C C C G


Reward: -1.0

Action: 0
. . . . . . . . . . . .
A . . . . . . . . . . .
. . . . . . . . . . . .
. C C C C C C C C C C G


Reward: -1.0

Action: 0
. . . . . . . . . . . .
A . . . . . . . . . . .
. . . . . . . . . . . .
. C C C C C C C C C C G


Reward: -1.0

Action: 0
. . . . . . . . . . . .
A . . . . . . . . . . .
. . . . . . . . . . . .
. C C C C C C C C C C G


Reward: -1.0

Action: 3
A . . . . . . . . . . .
. . . . . . . . . . . .
. . . . . . . . . . . .
. C C C C C C C C C C G


Reward: -1.0

Action: 2
. A . . . . . . . . . .
. . . . . . . . . . . .
. . . . . . . . . . . .
. C C C C C C C C C C G


Reward: -1.0

Action: 2
. . A . . . . . . . . .
. . . .