In [13]:
import importlib

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [14]:
model_module = importlib.import_module("handyrl.model")
env_module = importlib.import_module("handyrl.envs.kaggle.hungry_geese")
gtrxl_module = importlib.import_module("handyrl.envs.kaggle.models.gtrxl_torch")

In [15]:
e = env_module.Environment()
e.reset()

In [16]:
obs = e.observation()
obs = torch.from_numpy(obs[None, :, :]).clone()
print(f"size: {obs.size()}, type: {obs.dtype}")

obs_ = torch.cat([obs, obs], 0)

size: torch.Size([1, 4, 36]), type: torch.int64


In [17]:
obs

tensor([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          4, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1],
         [1, 2, 1, 1, 1, 3, 5, 1, 1, 1, 6, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
          1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]])

In [18]:
input_ = torch.randn(2, obs.size()[1], obs.size()[2])
print(f"size: {input_.size()}, type: {input_.dtype}")

size: torch.Size([2, 4, 36]), type: torch.float32


In [19]:
net = env_module.GeeseNetGTrXL(e)

In [20]:
# pytorch_total_params
params = sum(p.numel() for p in net.parameters())
print(f"{params:,}")

130,820


In [21]:
# pytorch_total_params (trainable)
params = sum(p.numel() for p in net.parameters() if p.requires_grad)
print(f"{params:,}")

130,820


In [22]:
net

GeeseNetGTrXL(
  (gtrxl): GTrXL(
    (embed): PositionalEncoding()
    (transfomer): TransformerEncoder(
      (layers): ModuleList(
        (0): TEL(
          (self_attn): MultiheadAttention(
            (out_proj): _LinearWithBias(in_features=36, out_features=36, bias=True)
          )
          (linear1): Linear(in_features=36, out_features=64, bias=True)
          (dropout): Dropout(p=0.0, inplace=False)
          (linear2): Linear(in_features=64, out_features=36, bias=True)
          (norm1): LayerNorm((36,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((36,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.0, inplace=False)
          (dropout2): Dropout(p=0.0, inplace=False)
          (gru_1): GRU(36, 36, batch_first=True)
          (gru_2): GRU(36, 36, batch_first=True)
        )
        (1): TEL(
          (self_attn): MultiheadAttention(
            (out_proj): _LinearWithBias(in_features=36, out_features=36, bias=True)
          )
  

In [23]:
out = net(obs_)
# print(f"size: {out.size()}, type: {out.dtype}")
print(f"size: {out['policy'].size()}, type: {out['policy'].dtype}")

size: torch.Size([2, 4]), type: torch.float32


In [24]:
out

{'policy': tensor([[  2.4850, 153.4285, 146.3302,   9.5833],
         [  2.1117, 149.4414, 144.9624,   6.5907]], grad_fn=<CatBackward>),
 'value': tensor([[1., 1.]], grad_fn=<TanhBackward>)}