**Two types**

Absolute: *For each position in input sequence, a unique embedding is added to the token's embedding to convey its exact loaction*

Input embedding + positional embedding = token embedding

Relative:*The emphasis is on the relative position or the distance between tokens. the model learnsthe relationship in terms of "how far apart" rather than at which exact position*


In [1]:
import torch
import tiktoken

In [2]:
with open("C:/Users/anshk/OneDrive/Desktop/LLM/Datasets/the-verdict.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()

In [3]:
from torch.utils.data import Dataset, DataLoader

class GPTDatasetV1(Dataset):
    def __init__(self, txt, tokenizer, max_length, stride):
        self.input_ids = []
        self.target_ids = []

        token_ids = tokenizer.encode(txt, allowed_special={"<|endoftext|>"})

        for i in range(0, len(token_ids) - max_length, stride):
            input_chunk = token_ids[i:i+max_length]
            target_chunk = token_ids[i+1:i+max_length+1]
            self.input_ids.append(torch.tensor(input_chunk))
            self.target_ids.append(torch.tensor(target_chunk))
        
    
    def __len__(self):
        return len(self.input_ids)
    
    def __getitem__(self, idx):
        return self.input_ids[idx] , self.target_ids[idx]

In [4]:
def create_dataloader_v1(txt, batch_size=4, max_length=256,stride=128, shuffle=True, drop_last=True, num_workers=0):

    tokenizer = tiktoken.get_encoding("gpt2")
    dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)

    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        drop_last=drop_last,
        num_workers=num_workers
    )

    return dataloader




In [5]:
vocab_size = 50257
output_dim = 256

token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)

In [6]:
max_length = 4
dataloader = create_dataloader_v1(
    raw_text, batch_size=8, max_length=max_length,
    stride = max_length,shuffle=False
)

data_iter = iter(dataloader)
inputs , targets = next(data_iter)

In [7]:
print("token IDs:\n", inputs)
print("token IDs shape:", inputs.shape)

token IDs:
 tensor([[   40,   367,  2885,  1464],
        [ 1807,  3619,   402,   271],
        [10899,  2138,   257,  7026],
        [15632,   438,  2016,   257],
        [  922,  5891,  1576,   438],
        [  568,   340,   373,   645],
        [ 1049,  5975,   284,   502],
        [  284,  3285,   326,    11]])
token IDs shape: torch.Size([8, 4])


In [8]:
token_embedding = token_embedding_layer(inputs)
print("token embedding:\n", token_embedding)
print("token embedding shape:", token_embedding.shape)

token embedding:
 tensor([[[-0.2440,  0.7625, -0.5820,  ..., -1.2853,  2.0107,  0.6343],
         [ 0.0252,  1.7414,  0.5879,  ...,  0.5811, -0.4826,  0.4717],
         [ 0.2577, -0.3697,  0.6461,  ...,  1.3514, -1.3790,  0.3190],
         [-0.4630, -0.2103, -1.0037,  ...,  0.2804, -0.4760, -1.1063]],

        [[-0.1983, -1.0045,  0.4638,  ..., -0.5927,  0.4291,  1.3377],
         [ 0.8566,  2.3831, -1.4070,  ..., -0.5381,  0.2112,  0.6770],
         [ 1.6264, -0.9534,  0.2101,  ...,  0.6573, -1.0363,  1.1449],
         [-1.6295, -1.1488, -0.2221,  ..., -0.3448, -1.9262, -1.1214]],

        [[-0.1428, -1.4351,  0.3176,  ...,  0.6982,  1.0452,  0.6206],
         [ 0.3215,  0.6797, -1.4317,  ...,  0.5068, -1.3675, -0.0350],
         [ 1.5025, -0.4257, -0.4658,  ...,  0.6983, -0.4191, -0.6936],
         [-1.3856,  0.0792,  0.6901,  ...,  1.8047, -0.4890, -0.0627]],

        ...,

        [[ 0.5745, -1.5984,  0.7439,  ..., -0.5898,  0.1458,  0.3496],
         [-0.5216, -0.7947, -0.5601,  .

In [9]:
context_length = max_length
pos_embedding_layer = torch.nn.Embedding(context_length, output_dim)

In [10]:
pos_embedding = pos_embedding_layer(torch.arange(max_length))
print("positional embedding:\n", pos_embedding)
print("positional embedding shape:", pos_embedding.shape)

positional embedding:
 tensor([[ 0.5830, -1.0771,  0.3607,  ...,  0.8952, -0.0574,  1.9709],
        [-0.5046, -0.0031, -2.0193,  ...,  1.1257,  0.4667, -0.2845],
        [-0.5482, -0.4945,  1.1000,  ...,  0.2527,  0.4895, -0.5412],
        [-0.4151, -0.4484, -0.5756,  ...,  1.0777, -0.0704,  0.2744]],
       grad_fn=<EmbeddingBackward0>)
positional embedding shape: torch.Size([4, 256])


In [11]:
input_embedding = token_embedding + pos_embedding
print("input embedding:\n", input_embedding)
print("input embedding shape:", input_embedding.shape)

input embedding:
 tensor([[[ 0.3389, -0.3146, -0.2212,  ..., -0.3901,  1.9533,  2.6052],
         [-0.4794,  1.7383, -1.4314,  ...,  1.7068, -0.0160,  0.1873],
         [-0.2905, -0.8642,  1.7462,  ...,  1.6042, -0.8895, -0.2221],
         [-0.8781, -0.6587, -1.5793,  ...,  1.3581, -0.5464, -0.8319]],

        [[ 0.3847, -2.0816,  0.8246,  ...,  0.3025,  0.3716,  3.3086],
         [ 0.3520,  2.3800, -3.4263,  ...,  0.5876,  0.6778,  0.3925],
         [ 1.0783, -1.4479,  1.3102,  ...,  0.9100, -0.5468,  0.6038],
         [-2.0446, -1.5972, -0.7977,  ...,  0.7329, -1.9965, -0.8471]],

        [[ 0.4402, -2.5122,  0.6783,  ...,  1.5934,  0.9878,  2.5915],
         [-0.1831,  0.6766, -3.4510,  ...,  1.6325, -0.9008, -0.3195],
         [ 0.9544, -0.9202,  0.6342,  ...,  0.9510,  0.0704, -1.2348],
         [-1.8007, -0.3692,  0.1145,  ...,  2.8824, -0.5594,  0.2116]],

        ...,

        [[ 1.1574, -2.6755,  1.1046,  ...,  0.3053,  0.0884,  2.3205],
         [-1.0262, -0.7978, -2.5794,  .