In [26]:
import torch
from torch import nn
import numpy
import pandas as pd

In [90]:
# Constants
EMBEDDING_DIM = 3000
BLOCK_SIZE = 16
N_HEADS = 4
device = torch.device('mps')

# Loading text and removing unnecessary characters.

In [68]:
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

remove = ['-', '$', '&']
for rm in remove:
    text = text.replace(rm, '')
words = text.split()

VOCAB_SIZE = len(set(words))
vocab = sorted(list(set(words)))

In [91]:
VOCAB_SIZE

25454

# Training and testing dataset.

In [54]:
vocab_int = {word:idx for idx, word in enumerate(vocab)}
int_vocab = {idx:word for idx, word in enumerate(vocab)}

word_to_int_array = [vocab_int[word] for word in words]

In [73]:
list_size = len(words)
inputs = list()
targets = list()
for i in range(10000):
    start = numpy.random.randint(1,
                                 list_size - BLOCK_SIZE - 1
                                )
    inputs.append(word_to_int_array[start:start+BLOCK_SIZE])
    targets.append(word_to_int_array[start+BLOCK_SIZE+1])


inputs = torch.tensor(inputs, 
                      dtype = torch.long, 
                      device = device
                     )
targets = torch.tensor(targets, 
                       dtype = torch.long, 
                       device = device
                      )    

# Transformer Blocks

## Embedding block

In [74]:
class EmbeddingBlock(nn.Module):
    """
    Embedding Block:

    Parameters:
    -----------

    Description:
    ------------

    To represent each number(word) with a unique sequence of numbers
    which the computer can understand. Along with this, positional 
    information is also represented in the same higher dimension and 
    added to the embedding tensor.

    Input dim: B,T
    Output dim: B,T,H (H = embedding dimension)
    """
    def __init__(self):
        super().__init__()
        self.embedding_layer = nn.Embedding(num_embeddings = VOCAB_SIZE, 
                                            embedding_dim =  EMBEDDING_DIM
                                           )
        self.pos_layer = nn.Embedding(num_embeddings = BLOCK_SIZE, 
                                      embedding_dim = EMBEDDING_DIM
                                     )

    def forward(self, x):
        B, T = x.shape
        embeddings = self.embedding_layer(x)
        pos = self.pos_layer(torch.arange(T, 
                                          device = device))
        logits = embeddings + pos

        return logits

In [87]:
m = EmbeddingBlock()
m.to(device)
temp = m(inputs[0:3])

In [89]:
print(inputs[0:3].shape, temp.shape)

torch.Size([3, 16]) torch.Size([3, 16, 3000])


## Encoder Block

### Multi-Head Attention Block

Head Block -> Multi-Head Block(Feed Forward Block)

#### Feed Forward Block

In [61]:
class FeedForwardBlock(nn.Module):
    pass

#### Head Block

In [63]:
class HeadBlock(nn.Module):
    """
    Head Block:

    Parameters:
    -----------

    Description:
    ------------

    Core block of
    """
    pass

#### Multi-Head Attention Block

In [62]:
class MultiHeadBlock(nn.Module):
    pass

In [64]:
class EncoderBlock(nn.Module):
    pass