In [3]:
!pip install tensorflow
!pip install gymnasium
!pip install keras
!pip install keras-rl2

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0
Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl.metadata (304 bytes)
Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.1/52.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras-rl2
Successfully installed keras-rl2-1.0.5


In [4]:
import gymnasium as gym
import numpy as np
import random

In [33]:
class BinPackingEnv(gym.Env):
  def __init__(self, ULD, package):

    self.pckg = package
    single_grid_space = lambda dim : {
        "dimensions": np.array([[0 for x in range(dim[0])] for y in range(dim[1])]),  # Discrete L and W
        "max_height": dim[2],
        "total_weight": 0,  # Weight as a scalar
        "max_weight": dim[3],
        "done": 0,
        "contains_priority_pack": 0,  # Boolean: 0 or 1
    }
    self.state = {
        f"ULD_{i}": single_grid_space(uld_i) for i, uld_i in enumerate(ULD)
    }

    self.observation_state = gym.spaces.MultiDiscrete(np.array([[i[0], i[1]] for i in ULD]))
    self.action_space_box = gym.spaces.Discrete(len(package))
    self.action_space_ULD = gym.spaces.Discrete(len(ULD))
    self.action_space_orientation = gym.spaces.Discrete(6)

  def step(self, act_ULD, act_obs, act_box, act_orn):
    package_ = self.pckg[act_box]
    ULD_ = self.state[f"ULD_{act_ULD}"]
    x, y, z = self._or_to_dim(package_[0], package_[1], package_[2], act_orn)
    ULD_X_lim, ULD_Y_lim = ULD_["dimensions"].shape
    ULD_x, ULD_y = act_obs[act_ULD]

    if x + ULD_x > ULD_X_lim or y + ULD_y > ULD_Y_lim:
      return self.isDone(), -1e5
    cur_max = 0

    for i in range(ULD_x, ULD_x+x):
      for j in range(ULD_y, ULD_y+y):
        cur_max = max(cur_max, ULD_["dimensions"][i, j])

    if cur_max+z > ULD_["max_height"]:
      ULD_["done"] = 1
      return self.isDone(), -1e5

    for i in range(ULD_x, ULD_x+x):
      for j in range(ULD_y, ULD_y+y):
        ULD_["dimensions"][i, j] = cur_max+z

    ULD_["total_weight"] += package_[3]
    if(ULD_["max_weight"] < ULD_["total_weight"]):
      ULD_["done"] = 1
      return self.isDone(), -1e5

    if package[4] == 1:
      ULD_["contains_priority_pack"] = 1

    #return done
    return self.isDone(), 0

  def reset(self):
    for i, uld_key in enumerate(self.state.keys()):
        ULD = self.state[uld_key]
        dimensions_shape = ULD["dimensions"].shape

        # Reset grid and ULD-specific properties
        self.state[uld_key] = {
            "dimensions": np.zeros(dimensions_shape, dtype=int),  # Reset grid to zero heights
            "max_height": ULD["max_height"],                      # Max height remains constant
            "total_weight": 0,                                    # Reset total weight
            "max_weight": ULD["max_weight"],                      # Max weight remains constant
            "done": 0,                                            # Mark ULD as not done
            "contains_priority_pack": 0                           # Reset priority pack flag
        }
    return None

  def isDone(self):
    for i in range(len(self.state)):
      if self.state[f"ULD_{i}"]["done"] == 0:
        return False
    return True

  def is_valid(self, act_ULD, act_box):
    if self.state[f"ULD_{act_ULD}"]["max_weight"] < self.state[f"ULD_{act_ULD}"]["total_weight"] + self.pckg[act_box][3]:
      self.state[f"ULD_{act_ULD}"]["done"] = 1
      return False

  def sample_package(self):
    return np.random.choice(self.action_space_box.n, size=1, replace=True).tolist()

  def _get_info(self):
    return {
        (f"ULD_{i}", self.state[f"ULD_{i}"]["dimensions"].shape, self.state[f"ULD_{i}"]["total_weight"], self.state[f"ULD_{i}"]["contains_priority_pack"]) for i in range(len(ULD))
    }

  def _or_to_dim(self, x, y, z, orn):
    match orn:
      case 0:
        return (x, y, z)
      case 1:
        return (x, z, y)
      case 2:
        return (y, x, z)
      case 3:
        return (y, z, x)
      case 4:
        return (z, x, y)
      case 5:
        return (z, y, x)

In [34]:
ULD = [
    [10, 10, 5, 100], [7, 9, 7, 120], [19, 32, 20, 130]
]
package = [
    [5, 5, 5, 70, 0, 200], [6, 4, 9, 60, 0, 150], [12, 15, 20, 100, 1], [12, 15, 20, 100, 1], [6, 4, 9, 60, 0, 150], [6, 4, 9, 60, 0, 150], [5, 5, 5, 70, 0, 200], [5, 5, 5, 70, 0, 200], [5, 5, 5, 70, 0, 200]
]
env = BinPackingEnv(ULD, package)
env._get_info()

{('ULD_0', (10, 10), 0, 0), ('ULD_1', (9, 7), 0, 0), ('ULD_2', (32, 19), 0, 0)}

In [35]:
num_ep = 100
for i in range(num_ep+1):
  env.reset()
  done = False
  score = 0

  while not done:
    act_ULD = env.action_space_ULD.sample()
    act_orn = env.action_space_orientation.sample()
    act_box = env.action_space_box.sample()
    act_obs = env.observation_state.sample()

    done, rew = env.step(act_ULD, act_obs, act_box, act_orn)
    score += rew
  print(f"Episode {i} score : {score}")

Episode 0 score : -5200000.0
Episode 1 score : -5200000.0
Episode 2 score : -1100000.0
Episode 3 score : -2300000.0
Episode 4 score : -4200000.0
Episode 5 score : -3200000.0
Episode 6 score : -3600000.0
Episode 7 score : -4300000.0
Episode 8 score : -2700000.0
Episode 9 score : -3800000.0
Episode 10 score : -4700000.0
Episode 11 score : -9400000.0
Episode 12 score : -1600000.0
Episode 13 score : -8600000.0
Episode 14 score : -2700000.0
Episode 15 score : -3700000.0
Episode 16 score : -4200000.0
Episode 17 score : -7300000.0
Episode 18 score : -11900000.0
Episode 19 score : -3800000.0
Episode 20 score : -3900000.0
Episode 21 score : -3000000.0
Episode 22 score : -4400000.0
Episode 23 score : -3000000.0
Episode 24 score : -7400000.0
Episode 25 score : -2300000.0
Episode 26 score : -2200000.0
Episode 27 score : -1300000.0
Episode 28 score : -4400000.0
Episode 29 score : -1600000.0
Episode 30 score : -3100000.0
Episode 31 score : -3200000.0
Episode 32 score : -1200000.0
Episode 33 score : 