In [1]:
import torch
from torch import nn, dtype

from src.engine.generate import generate
from src.models.character_generator import CharacterGenerator
from src.transformer.dot_product_attention import DotProductAttention


class DynamicPositionEncoding(nn.Module):

    def __init__(self, max_seq_length: int = 0, embedded_dim: int = 0,
                 scaling_factor: int = 10000, device = None):
        super(DynamicPositionEncoding, self).__init__()
        self.scaling_factor: int = scaling_factor
        self.device = device
        self.position_encodings: torch.Tensor = self.encode((max_seq_length, embedded_dim))

    def encode(self, input_dims: tuple):
        embedded_dim: int = input_dims[-1]
        seq_length: int = input_dims[-2]

        position_encodings = torch.zeros(seq_length, embedded_dim)
        if is_odd := embedded_dim % 2 != 0: embedded_dim += 1

        raw_positions = (torch.arange(seq_length, device= self.device).unsqueeze(1) /
                         (self.scaling_factor ** (torch.arange(0, embedded_dim, 2, device= self.device) / embedded_dim)))
        position_encodings[:, 0::2] = torch.sin(raw_positions)
        position_encodings[:, 1::2] = torch.cos(raw_positions[:, :-1] if is_odd else raw_positions)
        return position_encodings

    def forward(self, x: torch.Tensor):
        if (x.shape[-2] > self.position_encodings.shape[-2] or
                x.shape[-1] != self.position_encodings.shape[-1]):
            self.position_encodings = self.encode(x.shape)
        return x + self.position_encodings[:x.shape[-2], :]

t = torch.zeros(2, 2, 6)
posEn = DynamicPositionEncoding(3, t.shape[-1])

posEn.forward(t)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/Eric/PycharmProjects/Transformer_no_hugging_face/.venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instanc

tensor([[[0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 1.0000],
         [0.8415, 0.5403, 0.0464, 0.9989, 0.0022, 1.0000]],

        [[0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 1.0000],
         [0.8415, 0.5403, 0.0464, 0.9989, 0.0022, 1.0000]]])

In [32]:
import math
softmax = nn.Softmax()

x = torch.zeros(2, 2, 5) - 1
x[0, 0, 0] = 0
q = torch.randn(2, 2, 5)
k = torch.randn(2, 2, 5)
v = torch.randn(2, 2, 5)
embedded_dims = 5
raw_attention = torch.matmul(q, k.transpose(-1, -2)) / math.sqrt(embedded_dims)

mask_val = -1
if mask_val is not None:
    raw_attention.masked_fill(x == mask_val, -1e8)



attention_scores = softmax(raw_attention)
attention = torch.matmul(attention_scores, v)

attention, mask, raw_attention + mask


RuntimeError: The size of tensor a (5) must match the size of tensor b (2) at non-singleton dimension 2

In [2]:
from src.transformer.dot_product_attention import DotProductAttention
import torch
from torch import nn

attention_heads = [DotProductAttention(16, 2) for _ in range(8)]
x = torch.randn((2,3,16))

z = [attention_head(x) for attention_head in attention_heads]
y = torch.concatenate(
            z, dim=-1
        )
#y.permute(0, 2, 1, 3).reshape(2, 3, 8 * 2)
y.shape, z[1].shape

(torch.Size([2, 3, 16]), torch.Size([2, 3, 2]))

In [15]:
x = torch.randn((2,3,4))

mean = torch.mean(x, dim=-1)
sd =torch.var(x, dim= -1)
xp = x - mean / torch.sqrt(sd + 1e-5)

x, xp.shape

RuntimeError: The size of tensor a (4) must match the size of tensor b (3) at non-singleton dimension 2

In [60]:
x = torch.zeros((2, 3), dtype = torch.int64)
y: torch.Tensor = torch.rand((2, 5, 8))

x = [
            [y[batch, idx, :] for idx in x[batch, :]]
            for batch in range(x.shape[0])
        ]
x = torch.stack([torch.stack(sublist, dim=0) for sublist in x], dim=0)
x.shape

torch.Size([2, 3, 8])

In [75]:


mask = torch.triu(torch.ones(3, 3), diagonal=1) * -1e8
mask = mask.unsqueeze(0).expand(2, -1, -1)
mask

tensor([[[        -0., -100000000., -100000000.],
         [        -0.,         -0., -100000000.],
         [        -0.,         -0.,         -0.]],

        [[        -0., -100000000., -100000000.],
         [        -0.,         -0., -100000000.],
         [        -0.,         -0.,         -0.]]])

In [3]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

file_path = "/Users/Eric/PycharmProjects/Transformer_no_hugging_face/resources/bee_movie.txt"


with open(file_path, "r", encoding="utf-8") as f:
    macbeth = f.read()

len(macbeth)
class CharacterDataset(Dataset):

    def __init__(self, input_text, block_size: int):
        super().__init__()
        self.text = input_text
        self.block_size = block_size
        self.characters = sorted(set(input_text))
        self.encode = {char: idx for idx, char in enumerate(self.characters)}
        self.decode = {idx: char for idx, char in enumerate(self.characters)}
        self.encoded_text = torch.Tensor([self.encode[char] for char in input_text])

    def __len__(self):
        return len(self.encoded_text) - (self.block_size + 1)

    def __getitem__(self, idx: int) -> tuple:
        return self.encoded_text[idx:idx + self.block_size], self.encoded_text[idx + self.block_size]

train_split: int = int((len(macbeth) - 1) * 0.9)
train_dataset = CharacterDataset(macbeth[:train_split], 256)
test_dataset = CharacterDataset(macbeth[train_split:], 256)
full_dataset = CharacterDataset(macbeth, 256)
train_dataloader = DataLoader(dataset= train_dataset,
                              batch_size= 128)
test_dataloader = DataLoader(dataset= test_dataset,
                             batch_size= 128)
print(set(test_dataset.characters).difference(train_dataset.characters))
print(set(train_dataset.characters).difference(test_dataset.characters))
len(full_dataset.characters), len(train_dataset.characters), len(test_dataset.characters)

set()
{'Q', 'U', '8', ':', '0', '9', '3', '5', '6', '1', '4'}


(70, 70, 59)

In [2]:
invisible_chars = ['\u2006', '\ufeff']

with open("/Users/Eric/PycharmProjects/Transformer_no_hugging_face/resources/bee_movie.txt", "r", encoding="utf-8") as f:
    text = f.read()

for char in invisible_chars:
    text = text.replace(char, '')

with open("/Users/Eric/PycharmProjects/Transformer_no_hugging_face/resources/bee_movie.txt", "w", encoding="utf-8") as f:
    f.write(text)

In [148]:
y_target = torch.zeros((2, 3))
logits = torch.ones((2, 3, 4))
logits[0][0][0] = 0

softmax = nn.Softmax(dim=-1)
accuracy = (y_target == torch.argmax(softmax(logits), dim= -1)).sum().item() / (len(y_target) * y_target.shape[1])


y_target, torch.argmax(softmax(logits), dim= -1), accuracy



(tensor([[0., 0., 0.],
         [0., 0., 0.]]),
 tensor([[1, 0, 0],
         [0, 0, 0]]),
 0.8333333333333334)

In [2]:
from src.utils.io import load_model, get_text
from src.utils.character_dataset import CharacterDataset
from src.models.character_generator import CharacterGenerator

model = CharacterGenerator(vocab_size=75,
                           embedding_dim=256,
                           num_transformers=8,
                           num_heads=8,
                           ffnn_hidden_neurons=1024)

model: CharacterGenerator = load_model(model,
                                       "macbeth(old)_LR0.0005_E4_BK256_V75_D256_B64_T8_H8_PN1024_DP0.2.pth",
                                       device="cpu")

block_size = 256
dataset = CharacterDataset(get_text("macbeth(old)"), block_size)

seed_tokens = dataset.tokenised_text[:block_size].to("cpu")
input = "".join([dataset.decode[token.item()] for token in
 seed_tokens])
print(f"Input: \n{input}\nOutput:")
print("".join([dataset.decode[token.item()] for token in
       model.generate(seed_tokens, 500, block_size).squeeze(0)]))


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/Eric/PycharmProjects/Transformer_no_hugging_face/.venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instanc

Loaded macbeth(old)_LR0.0005_E4_BK256_V75_D256_B64_T8_H8_PN1024_DP0.2.pth from directory /Users/Eric/PycharmProjects/Transformer_no_hugging_face/models
Input: 
﻿Title: Macbeth

Author: William Shakespeare

1606

THE TRAGEDY OF MACBETH


by William Shakespeare



Dramatis Personae

  DUNCAN, King of Scotland
  MACBETH, Thane of Glamis and Cawdor, a general in the King's
army
  LADY MACBETH, his wife
  MACDUFF, Tha
Output:


TypeError: CharacterGenerator.generate() takes 2 positional arguments but 4 were given

In [7]:
from src.engine.generate import generate
from src.utils.io import load_model, get_text
from src.utils.character_dataset import CharacterDataset
from src.models.character_generator import CharacterGenerator

model = CharacterGenerator(vocab_size=75,
                           embedding_dim=256,
                           num_transformers=8,
                           num_heads=8,
                           ffnn_hidden_neurons=1024)

model: CharacterGenerator = load_model(model,
                                       "macbeth(old)_LR0.0005_E4_BK256_V75_D256_B64_T8_H8_PN1024_DP0.2.pth",
                                       device="cpu")

print(generate(get_text("macbeth(old)"), model, 100, 256, "cpu", False))

Loaded macbeth(old)_LR0.0005_E4_BK256_V75_D256_B64_T8_H8_PN1024_DP0.2.pth from directory /Users/Eric/PycharmProjects/Transformer_no_hugging_face/models
﻿Title: Macbeth

Author: William Shakespeare

1606

THE TRAGEDY OF MACBETH


by William Shakespeare



Dramatis Personae

  DUNCAN, King of Scotland
  MACBETH, Thane of Glamis and Cawdor, a general in the King's
army
  LADY MACBETH, his wife
  MACDUFF, Thane of Fife, a nobleman of Scotland
  LADY MACDUFF, his wife
  MALCOLM, elder son of Duncan
  DONALBA
