In [None]:
from torch import Tensor 


import torch 
from torch import nn 

In [1]:
import math 
from torch.nn import TransformerEncoderLayer, TransformerEncoder


class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)
    

class AverageSequencePooling(nn.Module):
    def __init__(self, dim: int = 1):
        super().__init__()
        self.dim = dim

    def forward(self, x: Tensor) -> Tensor:
        return x.mean(dim=self.dim)


class TransformerEstimatorNetwork(nn.Module):
    def __init__(self, input_dims: int, num_layers: int = 2, d_model: int = 128, nhead: int = 8, dim_feedforward: int = 32, dropout: float = 0.1):
        super().__init__()

        # TODO (Kacper) maybe we should add batchnorm before embedding as in the original MLP?
        # TODO (Kacper) also find out whether this embedding method with a simple linear layer is common
        embedding = nn.Linear(input_dims, d_model, bias=True)
        positional_encoding = PositionalEncoding(d_model=d_model, dropout=dropout, max_len=5000)

        encoder_layer = TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nhead, 
            dim_feedforward=dim_feedforward,
            dropout=dropout, 
            activation='relu', 
            layer_norm_eps=1e-5, 
            batch_first=True, # [batch, seq, feature]
            norm_first=False, # TODO (Kacper) check if modern version used layer norm prior to attention and feedforward or after
            bias=True, 
        )
        encoder = TransformerEncoder(
            encoder_layer, 
            num_layers=num_layers,
            norm=None, # TODO (Kacper) check if modern architectures use layer norm (I don't think so)
            enable_nested_tensor=True,
        )
        pooling = AverageSequencePooling(dim=1) # 1 is the sequence dimension
        linear = nn.Linear(d_model, self.num_actions, bias=True)

        self._network = nn.Sequential(
            embedding,
            positional_encoding,
            encoder,
            pooling,
            linear, 
        )

    @property
    def network(self) -> TransformerEncoder:
        return self._network
    
    def forward(self, s) -> Tensor:
        return self.network(s)

2024-03-04 22:58:45 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
  dictionaries = [ (Dictionary.load(f"{label_dir}/dict.{label}.txt") if label is not "" else None ) for label in self.cfg.labels]
usage: ipykernel_launcher.py [-h]
ipykernel_launcher.py: error: unrecognized arguments: --f=/home/kacperwyrwal/.local/share/jupyter/runtime/kernel-v2-189797olwRLrsIhEzp.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
