In [1]:
import torch

In [38]:
def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
    freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
    t = torch.arange(end, device=freqs.device, dtype=torch.float32)
    freqs = torch.outer(t, freqs)
    freqs_cis = torch.polar(torch.ones_like(freqs), freqs)  # complex64
    return freqs_cis


def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
    ndim = x.ndim
    assert 0 <= 1 < ndim
    assert freqs_cis.shape == (x.shape[1], x.shape[-1]), f"Expected: ({x.shape[1]}, {x.shape[-1]}). Found: {freqs_cis.shape}"
    shape = [d if i == 1 or i == ndim - 1 else 1 for i, d in enumerate(x.shape)]
    return freqs_cis.view(*shape)


def apply_rotary_emb(
    xq: torch.Tensor,
    xk: torch.Tensor,
    freqs_cis: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
    xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
    xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
    freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
    xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
    return xq_out.type_as(xq), xk_out.type_as(xk)

In [100]:
import math

batch_size, seq_len, n_head, head_features = 1, 4, 1, 4

x = torch.ones(batch_size, seq_len, n_head, head_features)
y = x.clone()

freq = precompute_freqs_cis(head_features, 10)
freq[:4, :]

tensor([[ 1.0000+0.0000j,  1.0000+0.0000j],
        [ 0.5403+0.8415j,  0.9999+0.0100j],
        [-0.4161+0.9093j,  0.9998+0.0200j],
        [-0.9900+0.1411j,  0.9996+0.0300j]])

In [91]:
x_ = torch.view_as_complex(x.float().reshape(*x.shape[:-1], -1, 2))
(x_ * reshape_for_broadcast(freq, x_)).flatten(3).shape

torch.Size([1, 4, 1, 2])

In [76]:
k = 1
x[:, k, :, :], x_rope[:, k, :, :]

(tensor([[[1., 1.]]]), tensor([[[-0.3012,  1.3818]]]))

In [82]:
math.asin(0.14), math.acos(-0.99)

(0.14046141470985582, 3.000053180265366)

In [77]:
freq

tensor([[ 1.0000+0.0000j],
        [ 0.5403+0.8415j],
        [-0.4161+0.9093j],
        [-0.9900+0.1411j]])

In [17]:
import torch
from torch.nn.functional import pad

spectrograms = [
    torch.randn(1, 128, 50),  # Example shape (frequency, time_length)
    torch.randn(1, 128, 60),
    torch.randn(1, 128, 55)
]

max_time_length = max(spectrogram.shape[-1] for spectrogram in spectrograms)
print(max_time_length)
padded_spectrograms = []
for spectrogram in spectrograms:
    time_padding = max_time_length - spectrogram.shape[-1]
    padded_spectrogram = pad(spectrogram, (0, time_padding)).squeeze(0)
    padded_spectrograms.append(padded_spectrogram)

batch_tensor = torch.stack(padded_spectrograms)

print(batch_tensor.shape)

60
torch.Size([3, 128, 60])


In [11]:
def _lengths_to_padding_mask(lengths: torch.Tensor) -> torch.Tensor:
    batch_size = lengths.shape[0]
    max_length = int(torch.max(lengths).item())
    padding_mask = torch.arange(max_length, device=lengths.device, dtype=lengths.dtype).expand(
        batch_size, max_length
    ) < lengths.unsqueeze(1)
    return padding_mask

In [12]:
padding_mask = _lengths_to_padding_mask(torch.tensor([1, 3, 6]))
padding_mask

tensor([[ True, False, False, False, False, False],
        [ True,  True,  True, False, False, False],
        [ True,  True,  True,  True,  True,  True]])

In [10]:
padding_mask.view(3, 1, 1, 6).expand(-1, 1, -1, -1)

tensor([[[[False,  True,  True,  True,  True,  True]]],


        [[[False, False, False,  True,  True,  True]]],


        [[[False, False, False, False, False, False]]]])

In [4]:
lengths = torch.tensor([1, 3, 6])
max_length = int(torch.max(lengths).item())
mask = torch.full((max_length, max_length), float("-inf"), device=lengths.device)

mask = torch.triu(mask, diagonal=1)

In [5]:
mask

tensor([[0., -inf, -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf, -inf],
        [0., 0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0., 0.]])

In [13]:
class CustomModule(torch.nn.Module):
    def __init__(self):
        super().__init__()

        a = torch.tensor([0., 0., 0.])
        self.register_buffer("aboba", a)
    
    def forward(self, x):
        return self.aboba

In [16]:
m = CustomModule().to("cuda")
m(1)

RuntimeError: No CUDA GPUs are available

In [17]:
torch.cuda.is_available()

False