**Two types**

Absolute: *For each position in input sequence, a unique embedding is added to the token's embedding to convey its exact loaction*

Input embedding + positional embedding = token embedding

Relative:*The emphasis is on the relative position or the distance between tokens. the model learnsthe relationship in terms of "how far apart" rather than at which exact position*


In [5]:
import torch
import tiktoken

In [6]:
with open("C:/Users/anshk/OneDrive/Desktop/LLM/Datasets/the-verdict.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()

In [7]:
from torch.utils.data import Dataset, DataLoader

class GPTDatasetV1(Dataset):
    def __init__(self, txt, tokenizer, max_length, stride):
        self.input_ids = []
        self.target_ids = []

        token_ids = tokenizer.encode(txt, allowed_special={"<|endoftext|>"})

        for i in range(0, len(token_ids) - max_length, stride):
            input_chunk = token_ids[i:i+max_length]
            target_chunk = token_ids[i+1:i+max_length+1]
            self.input_ids.append(torch.tensor(input_chunk))
            self.target_ids.append(torch.tensor(target_chunk))
        
    
    def __len__(self):
        return len(self.input_ids)
    
    def __getitem__(self, idx):
        return self.input_ids[idx] , self.target_ids[idx]

In [8]:
def create_dataloader_v1(txt, batch_size=4, max_length=256,stride=128, shuffle=True, drop_last=True, num_workers=0):

    tokenizer = tiktoken.get_encoding("gpt2")
    dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)

    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        drop_last=drop_last,
        num_workers=num_workers
    )

    return dataloader




In [9]:
vocab_size = 50257
output_dim = 256

token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)

In [18]:
max_length = 4
dataloader = create_dataloader_v1(
    raw_text, batch_size=8, max_length=max_length,
    stride = max_length,shuffle=False
)

data_iter = iter(dataloader)
inputs , targets = next(data_iter)

In [19]:
print("token IDs:\n", inputs)
print("token IDs shape:", inputs.shape)

token IDs:
 tensor([[   40,   367,  2885,  1464],
        [ 1807,  3619,   402,   271],
        [10899,  2138,   257,  7026],
        [15632,   438,  2016,   257],
        [  922,  5891,  1576,   438],
        [  568,   340,   373,   645],
        [ 1049,  5975,   284,   502],
        [  284,  3285,   326,    11]])
token IDs shape: torch.Size([8, 4])


In [20]:
token_embedding = token_embedding_layer(inputs)
print("token embedding:\n", token_embedding)
print("token embedding shape:", token_embedding.shape)

token embedding:
 tensor([[[ 0.1502,  1.7766, -0.4974,  ...,  1.3028, -0.5298,  0.1891],
         [-0.7940,  1.5534, -0.9948,  ...,  0.3806, -0.6563,  0.6941],
         [-0.3718, -1.0039,  1.5890,  ..., -1.7633, -0.6094,  0.3986],
         [ 0.6215, -0.8419, -0.6899,  ...,  0.7351,  0.4315, -0.2839]],

        [[ 0.2301, -0.4256, -1.0295,  ..., -0.5378,  2.4979,  0.0975],
         [-1.2164,  1.0402,  1.6129,  ..., -1.6432, -0.7806,  0.5704],
         [ 0.8155, -0.3002,  2.1801,  ...,  0.8985,  0.0634,  0.5205],
         [ 0.1896, -1.5521,  0.6078,  ..., -1.1363,  0.1531,  0.9384]],

        [[ 0.1036,  1.8998, -0.4487,  ...,  0.5173, -0.0386, -1.2152],
         [ 1.2996, -0.0690,  0.2348,  ...,  0.7924, -0.7509,  1.2181],
         [-0.1985, -0.4691, -1.4262,  ...,  0.9589,  0.1182,  0.8242],
         [-0.6293, -0.2836,  0.7863,  ...,  0.5548,  0.1321, -1.3405]],

        ...,

        [[-0.2612,  0.3330,  0.7294,  ..., -1.0338, -0.3780, -0.8606],
         [ 2.0468, -0.4125,  2.7718,  .

In [22]:
context_length = max_length
pos_embedding_layer = torch.nn.Embedding(context_length, output_dim)

In [23]:
pos_embedding = pos_embedding_layer(torch.arange(max_length))
print("positional embedding:\n", pos_embedding)
print("positional embedding shape:", pos_embedding.shape)

positional embedding:
 tensor([[-0.4888,  0.2885,  0.5037,  ...,  0.3666,  0.7809,  0.9031],
        [-0.0792,  0.5920,  0.8640,  ..., -0.3289,  0.1341,  1.2895],
        [ 0.3288, -0.8666, -0.8885,  ..., -0.4288, -1.4908,  0.4945],
        [-0.3368, -1.1718,  0.0603,  ..., -0.8182,  0.5480, -0.3375]],
       grad_fn=<EmbeddingBackward0>)
positional embedding shape: torch.Size([4, 256])


In [24]:
input_embedding = token_embedding + pos_embedding
print("input embedding:\n", input_embedding)
print("input embedding shape:", input_embedding.shape)

input embedding:
 tensor([[[-0.3385,  2.0650,  0.0063,  ...,  1.6695,  0.2511,  1.0923],
         [-0.8732,  2.1453, -0.1307,  ...,  0.0517, -0.5223,  1.9837],
         [-0.0429, -1.8705,  0.7005,  ..., -2.1921, -2.1001,  0.8931],
         [ 0.2846, -2.0137, -0.6295,  ..., -0.0831,  0.9795, -0.6215]],

        [[-0.2586, -0.1371, -0.5258,  ..., -0.1712,  3.2788,  1.0006],
         [-1.2956,  1.6321,  2.4769,  ..., -1.9720, -0.6465,  1.8600],
         [ 1.1443, -1.1667,  1.2916,  ...,  0.4697, -1.4274,  1.0150],
         [-0.1472, -2.7239,  0.6681,  ..., -1.9545,  0.7011,  0.6009]],

        [[-0.3852,  2.1883,  0.0550,  ...,  0.8839,  0.7423, -0.3121],
         [ 1.2204,  0.5229,  1.0988,  ...,  0.4636, -0.6168,  2.5076],
         [ 0.1304, -1.3357, -2.3148,  ...,  0.5301, -1.3726,  1.3188],
         [-0.9661, -1.4554,  0.8467,  ..., -0.2634,  0.6801, -1.6780]],

        ...,

        [[-0.7499,  0.6215,  1.2331,  ..., -0.6672,  0.4029,  0.0425],
         [ 1.9675,  0.1794,  3.6358,  .