In [1]:
import importlib
import math

import numpy as np
import torch as T
import torch.nn as nn
import torch.nn.functional as F

In [2]:
model_module = importlib.import_module("handyrl.model")
env_module = importlib.import_module("handyrl.envs.kaggle.hungry_geese")

Loading environment football failed: No module named 'gfootball'


In [3]:
class PositionalEncoding(nn.Module):
    """
    Positional Encoding : takes a 2d tensor --> 3d tensor
    Injects some information on the relevant position of the img in the sequence
    """

    def __init__(self, d_model, dropout=0.1, max_len=1024):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = T.zeros(max_len, d_model)
        position = T.arange(0, max_len, dtype=T.float).unsqueeze(1)
        div_term = T.exp(T.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = T.sin(position * div_term)
        pe[:, 1::2] = T.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[: x.size(0), :]
        return self.dropout(x)


class TEL(nn.TransformerEncoderLayer):
    """
    Recreate the transfomer layers done in the following paper
    https://arxiv.org/pdf/1910.06764.pdf
    """

    def __init__(self, d_model, nhead, n_layers=1, dim_feedforward=256, activation="relu", dropout=0):
        super().__init__(d_model, nhead, dim_feedforward, dropout, activation)
        # 2 GRUs are needed - 1 for the beginning / 1 at the end
        self.gru_1 = nn.GRU(d_model, d_model, num_layers=n_layers, batch_first=True)
        self.gru_2 = nn.GRU(input_size=d_model, hidden_size=d_model, num_layers=n_layers, batch_first=True)

    def forward(self, src, src_mask=None, src_key_padding_mask=None):
        h = (src).sum(dim=1).unsqueeze(dim=0)
        src = self.norm1(src)
        out = self.self_attn(src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]

        out, h = self.gru_1(out, h)
        out = self.norm2(out)
        out = self.activation(self.linear1(out))
        out = self.activation(self.linear2(out))
        out, h = self.gru_2(out, h)
        return out


class GTrXL(nn.Module):
    """
    Implementation of transfomer model using GRUs
    """

    def __init__(
        self,
        d_model,
        nheads,
        transformer_layers,
        hidden_dims=256,
        n_layers=1,
        chkpt_dir="models",
        activation="relu",
        network_name="network.pt",
    ):
        super(GTrXL, self).__init__()
        # Module layers
        self.embed = PositionalEncoding(d_model)
        encoded = TEL(d_model, nheads, n_layers, dim_feedforward=hidden_dims, activation=activation)
        self.transfomer = nn.TransformerEncoder(encoded, transformer_layers)

    def forward(self, x):
        x = self.embed(x)
        x = self.transfomer(x)
        return x

In [4]:
class GeeseNet(model_module.BaseModel):
    def __init__(self, env, args={}):
        super().__init__(env, args)
        d_model = 16
        filters = 64

        self.gtrxl = GTrXL(d_model=d_model, nheads=4, transformer_layers=1, hidden_dims=4, n_layers=1)

        self.head_p1 = nn.Linear(77 * d_model, filters, bias=False)
        self.head_p2 = nn.Linear(filters, 4, bias=False)
        self.head_v1 = nn.Linear(77 * d_model, filters, bias=True)
        self.head_v2 = nn.Linear(filters, 1, bias=True)

    def forward(self, x, _=None):
        h = self.gtrxl(x)
        h = h.reshape(-1, h.size(1) * h.size(2))  # 77 * 16 = 1232

        h_p = F.relu_(self.head_p1(h))
        p = self.head_p2(h_p)

        h_v = F.relu_(self.head_v1(h))
        v = T.tanh(self.head_v2(h_v))

        return {"policy": p, "value": v}

In [5]:
e = env_module.Environment()
e.reset()

In [6]:
net = GeeseNet(e)

In [7]:
input_ = T.randn(1, 77, 16)
input_.size()

torch.Size([1, 77, 16])

In [8]:
out = net(input_)
out['policy'].size()

torch.Size([1, 4])

In [9]:
out

{'policy': tensor([[-0.2757, -0.0942, -0.2077, -0.5154]], grad_fn=<MmBackward>),
 'value': tensor([[0.4704]], grad_fn=<TanhBackward>)}

In [10]:
net

GeeseNet(
  (gtrxl): GTrXL(
    (embed): PositionalEncoding(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transfomer): TransformerEncoder(
      (layers): ModuleList(
        (0): TEL(
          (self_attn): MultiheadAttention(
            (out_proj): _LinearWithBias(in_features=16, out_features=16, bias=True)
          )
          (linear1): Linear(in_features=16, out_features=4, bias=True)
          (dropout): Dropout(p=0, inplace=False)
          (linear2): Linear(in_features=4, out_features=16, bias=True)
          (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0, inplace=False)
          (dropout2): Dropout(p=0, inplace=False)
          (gru_1): GRU(16, 16, batch_first=True)
          (gru_2): GRU(16, 16, batch_first=True)
        )
      )
    )
  )
  (head_p1): Linear(in_features=1232, out_features=64, bias=False)
  (head_p2): Linear(in_features=64,

In [11]:
# pytorch_total_params
params = sum(p.numel() for p in net.parameters())
print(f"{params:,}")

162,645


In [12]:
# pytorch_total_params (trainable)
params = sum(p.numel() for p in net.parameters() if p.requires_grad)
print(f"{params:,}")

162,645
