<a href="https://colab.research.google.com/github/Arkajeet7/warehouse-robotics-using-reinforcement-learning-/blob/main/environment/environment_rl_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces
import matplotlib.pyplot as plt
from collections import deque
import random
import torch
from torch import nn
import torch.nn.functional as F
import importlib.util
import pandas as pd

In [None]:
class WarehouseEnv(gym.Env):
    """
    Warehouse Environment for RL tasks.

    Grid representation:
    - 0: walkable path
    - 1: rack/obstacle
    - S: start (0.5)
    - I: intermediate (0.65)
    - G: goal (0.75)
    - A: agent (1.0)

    Actions:
    0: LEFT, 1: DOWN, 2: RIGHT, 3: UP
    """

    metadata = {'render_modes': ['human', 'ansi'], 'render_fps': 4}

    def __init__(self, rows=11, colm=10, render_mode=None):
        super(WarehouseEnv, self).__init__()

        self.rows = rows
        self.colm = colm
        self.render_mode = render_mode
        self.reached_intermediate = False
        self.episode_num = 0

        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(low=0, high=1, shape=(rows, colm), dtype=np.float32)

        self.warehouse_layout = self._create_default_layout()

        self.start_pos = self.random_choice()

        #checks if the intermediate and the goal are equal
        while True:
          self.intermediate_pos = self.random_choice()
          SI = self.manhattan(self.start_pos, self.intermediate_pos)
          if self.intermediate_pos!=self.start_pos and SI>3:
            break
        #checks if the goal position is not equal to start and intermediate
        while True:
          self.goal_pos = self.random_choice()
          IG = self.manhattan(self.intermediate_pos, self.goal_pos)
          if self.goal_pos!=self.intermediate_pos and IG>3 :
            break
        self.steps = 0
        self.agent_pos = self.start_pos
        self.max_steps = rows * colm * 32

    def _create_default_layout(self):
        layout = np.zeros((self.rows, self.colm), dtype=int)
        columns_to_one = [1, 3, 5, 7, 9]
        layout[:, columns_to_one] = 1
        rows_to_zero = [0, 10]
        layout[rows_to_zero, :] = 0
        return layout