In [1]:
import torch
from torch import nn

from src.transformer.dot_product_attention import DotProductAttention


class DynamicPositionEncoding(nn.Module):

    def __init__(self, max_seq_length: int = 0, embedded_dim: int = 0,
                 scaling_factor: int = 10000, device = None):
        super(DynamicPositionEncoding, self).__init__()
        self.scaling_factor: int = scaling_factor
        self.device = device
        self.position_encodings: torch.Tensor = self.encode((max_seq_length, embedded_dim))

    def encode(self, input_dims: tuple):
        embedded_dim: int = input_dims[-1]
        seq_length: int = input_dims[-2]

        position_encodings = torch.zeros(seq_length, embedded_dim)
        if is_odd := embedded_dim % 2 != 0: embedded_dim += 1

        raw_positions = (torch.arange(seq_length, device= self.device).unsqueeze(1) /
                         (self.scaling_factor ** (torch.arange(0, embedded_dim, 2, device= self.device) / embedded_dim)))
        position_encodings[:, 0::2] = torch.sin(raw_positions)
        position_encodings[:, 1::2] = torch.cos(raw_positions[:, :-1] if is_odd else raw_positions)
        return position_encodings

    def forward(self, x: torch.Tensor):
        if (x.shape[-2] > self.position_encodings.shape[-2] or
                x.shape[-1] != self.position_encodings.shape[-1]):
            self.position_encodings = self.encode(x.shape)
        return x + self.position_encodings[:x.shape[-2], :]

t = torch.zeros(2, 2, 6)
posEn = DynamicPositionEncoding(3, t.shape[-1])

posEn.forward(t)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/Eric/PycharmProjects/Transformer_no_hugging_face/.venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instanc

tensor([[[0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 1.0000],
         [0.8415, 0.5403, 0.0464, 0.9989, 0.0022, 1.0000]],

        [[0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 1.0000],
         [0.8415, 0.5403, 0.0464, 0.9989, 0.0022, 1.0000]]])

In [32]:
import math
softmax = nn.Softmax()

x = torch.zeros(2, 2, 5) - 1
x[0, 0, 0] = 0
q = torch.randn(2, 2, 5)
k = torch.randn(2, 2, 5)
v = torch.randn(2, 2, 5)
embedded_dims = 5
raw_attention = torch.matmul(q, k.transpose(-1, -2)) / math.sqrt(embedded_dims)

mask_val = -1
if mask_val is not None:
    raw_attention.masked_fill(x == mask_val, -1e8)



attention_scores = softmax(raw_attention)
attention = torch.matmul(attention_scores, v)

attention, mask, raw_attention + mask


RuntimeError: The size of tensor a (5) must match the size of tensor b (2) at non-singleton dimension 2

In [2]:
from src.transformer.dot_product_attention import DotProductAttention
import torch
from torch import nn

attention_heads = [DotProductAttention(16, 2) for _ in range(8)]
x = torch.randn((2,3,16))

z = [attention_head(x) for attention_head in attention_heads]
y = torch.concatenate(
            z, dim=-1
        )
#y.permute(0, 2, 1, 3).reshape(2, 3, 8 * 2)
y.shape, z[1].shape

(torch.Size([2, 3, 16]), torch.Size([2, 3, 2]))

In [15]:
x = torch.randn((2,3,4))

mean = torch.mean(x, dim=-1)
sd =torch.var(x, dim= -1)
xp = x - mean / torch.sqrt(sd + 1e-5)

x, xp.shape

RuntimeError: The size of tensor a (4) must match the size of tensor b (3) at non-singleton dimension 2

In [60]:
x = torch.zeros((2, 3), dtype = torch.int64)
y: torch.Tensor = torch.rand((2, 5, 8))

x = [
            [y[batch, idx, :] for idx in x[batch, :]]
            for batch in range(x.shape[0])
        ]
x = torch.stack([torch.stack(sublist, dim=0) for sublist in x], dim=0)
x.shape

torch.Size([2, 3, 8])

In [65]:


mask = torch.triu(torch.ones(3, 3), diagonal=1) * -1e8
mask = mask.unsqueeze(0).expand(2, -1, -1)

(2, 3)