In [1]:
import os
import sys
import numpy as np
import torch

sys.path.append(os.path.abspath('..'))

from env.env import BinPacking3DEnv
from models.transformer import BinPackingTransformer
from models.policy_net import PolicyNetwork
from models.value_net import ValueNetwork

# TEST: Transformer Model

In [2]:
# Create a transformer model
transformer = BinPackingTransformer(
	d_model=128,
	n_head=8,
	n_layers=3,
	d_feedforward=512,
)

In [3]:
transformer

BinPackingTransformer(
  (ems_list_embedding): Embedding(
    (linear): Linear(in_features=6, out_features=128, bias=True)
  )
  (buffer_embedding): Embedding(
    (linear): Linear(in_features=3, out_features=128, bias=True)
  )
  (transformer_blocks): ModuleList(
    (0-2): 3 x TransformerBlock(
      (self_attn_ems_list): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
      )
      (self_attn_buffer): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
      )
      (norm1_ems_list): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (norm1_buffer): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (mlp_ems_list): Sequential(
        (0): Linear(in_features=128, out_features=512, bias=True)
        (1): ReLU()
        (2): Linear(in_features=512, out_features=128, bias=True)
      )
      (mlp_buffer): Sequential(
        (0): Lin

In [None]:
total_params = sum(p.numel() for p in transformer.parameters())

total_params

2430081

In [4]:
# Create an environment and dummy input to test the model
env = BinPacking3DEnv(
	bin_size=(5, 5, 5),
	items=[(2, 3, 1), (2, 2, 3), (1, 1, 2), (3, 2, 2)],
	buffer_size=2,
	num_rotations=2,
	max_ems=100,
)

obervation = env.reset()

In [5]:
# EMS list
ems_list = obervation['ems_list']

ems_list.shape

(100, 6)

In [6]:
# EMS mask
ems_mask = obervation['ems_mask']

ems_mask.shape

(100,)

In [7]:
# Buffer
buffer = obervation['buffer']

buffer.shape

(2, 3)

In [8]:
# Expand the dimensions of the input to match the model's input shape
ems_list_np = np.expand_dims(ems_list, axis=0)  # [1, max_ems, 6]
ems_mask_np = np.expand_dims(ems_mask, axis=0)  # [1, max_ems]
buffer_np = np.expand_dims(buffer, axis=0)      # [1, buffer_size, 3]

ems_list_np.shape, ems_mask_np.shape, buffer_np.shape

((1, 100, 6), (1, 100), (1, 2, 3))

In [9]:
# Convert the numpy arrays to tensors
ems_list_tensor = torch.tensor(ems_list_np, dtype=torch.float32)
ems_mask_tensor = torch.tensor(ems_mask_np, dtype=torch.bool)
buffer_tensor = torch.tensor(buffer_np, dtype=torch.float32)


ems_list_tensor.shape, ems_mask_tensor.shape, buffer_tensor.shape

(torch.Size([1, 100, 6]), torch.Size([1, 100]), torch.Size([1, 2, 3]))

In [10]:
# Choose the device to run the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cuda')

In [11]:
# Move the model and input tensors to the device
transformer.to(device)

ems_list_tensor = ems_list_tensor.to(device)
ems_mask_tensor = ems_mask_tensor.to(device)
buffer_tensor = buffer_tensor.to(device)

In [12]:
# Set the model to evaluation mode
transformer.eval()

with torch.no_grad():
	ems_list_features, buffer_features = transformer(
		ems_list=ems_list_tensor,
		buffer=buffer_tensor,
		ems_mask=ems_mask_tensor,
	)

ems_list_features.shape, buffer_features.shape

(torch.Size([1, 128]), torch.Size([1, 128]))

# TEST: Value Network

In [13]:
value_net = ValueNetwork(
	d_input=128,
	d_hidden=128,
)

value_net.to(device)

value_net.eval()

with torch.no_grad():
	value = value_net(ems_list_features, buffer_features)

value.shape

torch.Size([1, 1])

# TEST: Policy Network

In [14]:
# Get the action mask
action_mask = obervation['action_mask'] # [W, L, num_rotations, buffer_size]

action_mask.shape

(5, 5, 2, 2)

In [15]:
# Expand the dimensions of the action mask to match the model's input shape
action_mask_np = np.expand_dims(action_mask, axis=0)  # [batch_size=1, W, L, num_rotations, buffer_size]
action_mask_np.shape

(1, 5, 5, 2, 2)

In [16]:
# Convert the numpy array to a tensor
action_mask_tensor = torch.tensor(action_mask_np, dtype=torch.bool) # [batch_size=1, W, L, num_rotations, buffer_size]

action_mask_tensor.shape

torch.Size([1, 5, 5, 2, 2])

In [17]:
# Reshape the action mask tensor
action_mask_tensor = action_mask_tensor.reshape(1, -1)  # [batch_size=1, W * L * num_rotations * buffer_size]

action_mask_tensor.shape

torch.Size([1, 100])

In [18]:
policy_net = PolicyNetwork(
	d_action=5*5*2*2,
	d_input=128,
	d_hidden=128,
)

policy_net.to(device)
action_mask_tensor = action_mask_tensor.to(device)

policy_net.eval()

with torch.no_grad():
	policy = policy_net(ems_list_features, buffer_features, action_mask_tensor)

policy.shape # [batch_size=1, W * L * num_rotations * buffer_size]

torch.Size([1, 100])