<a href="https://colab.research.google.com/github/NoCodeProgram/deepLearning/blob/main/transformer/position_encoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/NoCodeProgram/deepLearning.git

Cloning into 'deepLearning'...
remote: Enumerating objects: 274, done.[K
remote: Counting objects: 100% (135/135), done.[K
remote: Compressing objects: 100% (78/78), done.[K
remote: Total 274 (delta 55), reused 135 (delta 55), pack-reused 139[K
Receiving objects: 100% (274/274), 12.41 MiB | 19.68 MiB/s, done.
Resolving deltas: 100% (91/91), done.


In [2]:
import torch
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

# Read the text file
with open('deepLearning/transformer/shakespeare.txt', 'r') as file:
    text = file.read()

# Tokenize the text  (this is very simple tokenizer, in reality you would use a more advanced one)
tokenizer = get_tokenizer('basic_english')
tokens = tokenizer(text)
unique_tokens = set(tokens)

In [3]:
stoi = { s:i for i,s in enumerate(unique_tokens)}
itos = { i:s for i,s in enumerate(unique_tokens)}
# print(stoi)
# print(itos)

vocab_size = len(unique_tokens)
print(vocab_size)


3129


In [11]:
sentence = "i love you all"
indices = [stoi[word] for word in sentence.split()]
print(indices)


[755, 1032, 176, 707]


In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SelfAttention(nn.Module):
    def __init__(self, embed_dim, atten_dim):
        super().__init__()
        self.query = nn.Linear(embed_dim, atten_dim, bias=False)
        self.key = nn.Linear(embed_dim, atten_dim, bias=False)
        self.value = nn.Linear(embed_dim, atten_dim, bias=False)

    def forward(self, x):
        query = self.query(x)
        key = self.key(x)
        value = self.value(x)

        scores = torch.matmul(query, key.transpose(-2, -1))
        scores = scores / key.size(-1)**0.5

        attention_weights = F.softmax(scores, dim=-1)
        weighted_values = torch.matmul(attention_weights, value)

        return weighted_values

In [29]:
class MultiheadAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super().__init__()
        attention_dim = embed_dim // num_heads
        self.attentions = nn.ModuleList([SelfAttention(embed_dim, attention_dim) for _ in range(num_heads)])
        self.fc = nn.Linear(embed_dim, embed_dim)

    def forward(self, x):
        head_outputs = []
        for attention in self.attentions:
            head_output = attention(x)
            head_outputs.append(head_output)

        concatenated_heads = torch.cat(head_outputs, dim=-1)
        output = self.fc(concatenated_heads)
        return output

In [30]:
class FeedFoward(nn.Module):
    def __init__(self, embed_dim, ff_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim),
        )
    def forward(self, x):
        return self.net(x)

In [31]:
class EncoderBlock(nn.Module):
    def __init__(self, embed_dim, n_head):
        super().__init__()
        self.layer_norm1 = nn.LayerNorm(embed_dim)
        self.multihead_atten = MultiheadAttention(embed_dim, n_head)

        self.layer_norm2 = nn.LayerNorm(embed_dim)
        self.feed_forward = FeedFoward(embed_dim, 4*embed_dim)

    def forward(self, x):
        x = x + self.multihead_atten(self.layer_norm1(x))
        x = x + self.feed_forward(self.layer_norm2(x))
        return x

In [35]:
class TransforemrWithEmbedding(nn.Module):
    def __init__(self, vocab_size, embed_dim, n_heads, n_layers, max_len, num_classes=2):
        super().__init__()
        self.embed_dim = embed_dim
        self.word_embedding = nn.Embedding(vocab_size, embed_dim)
        self.positional_encoding = nn.Embedding(max_len, embed_dim)
        self.transformer_encoders = nn.Sequential(*[EncoderBlock(embed_dim, n_heads) for _ in range(n_layers)])


    def forward(self, x):
        word_embedding = self.word_embedding(x)
        positions = torch.arange(0, x.size(1), device=x.device).unsqueeze(0)
        pos_embeddings = self.positional_encoding(positions)
        x = word_embedding + pos_embeddings
        x = self.transformer_encoders(x)

        return x

In [36]:
embed_dim = 20
n_head = 4
n_layers = 4
max_len = 4

input_tensor = torch.tensor(indices)[None,:]
model = TransforemrWithEmbedding(vocab_size=vocab_size, embed_dim=embed_dim, n_heads=n_head, n_layers=n_layers, max_len=max_len)
output = model(input_tensor)
print(output.shape)

torch.Size([1, 4, 20])
