In [6]:
import yaml
import tiktoken
import torch
from torch import nn

from dataset import Data
from dataloader import get_data_loader
from embeddings import Embeddings

In [7]:
with open("the-verdict.txt","r") as f:
    raw_text = f.read()

In [8]:
with open("config.yaml","r") as f:
    config = yaml.safe_load(f)

config

{'dropout': 0.0,
 'vocab_size': 50257,
 'embedding_dim': 4,
 'stride': 3,
 'batch_size': 2,
 'shuffle': False,
 'drop_last': True,
 'num_workers': 0,
 'context_window': 4,
 'num_heads': 2,
 'Q_K_V_bias': False,
 'kv_bias': False,
 'batch_first': True,
 'device': 'None'}

In [9]:
dataset = Data(
    raw_text=raw_text,
    tokenizer=tiktoken.get_encoding("gpt2"),
    context_length=config["context_window"],
    stride=config["stride"]
)

data_dl = get_data_loader(
    dataset,
    batch_size=config["batch_size"],
    shuffle=config["shuffle"],
    drop_last=config["drop_last"],
    num_workers=config["num_workers"]
    )


for x,y in data_dl:
    print(x)
    print(y)
    break

tensor([[  40,  367, 2885, 1464],
        [1464, 1807, 3619,  402]])
tensor([[ 367, 2885, 1464, 1807],
        [1807, 3619,  402,  271]])


In [10]:
torch.manual_seed(1)
embeddings = Embeddings(config["vocab_size"],config["embedding_dim"],config["context_window"])
embeddings(x).shape

torch.Size([2, 4, 4])

In [20]:
torch.manual_seed(1)
mha_instance = nn.MultiheadAttention(
    embed_dim=config['embedding_dim'],
    num_heads= config['num_heads'],
    batch_first= config['batch_first'],
    bias= config['Q_K_V_bias'],
    add_bias_kv= config['kv_bias'],
    dropout= config['dropout'],
    device= None 
)
mha_instance(embeddings(x),embeddings(x),embeddings(x))

(tensor([[[-0.4241,  0.6136, -0.1740, -0.2770],
          [ 0.7650, -1.0892,  0.2287,  0.5502],
          [ 0.2196, -0.3521,  0.0163,  0.1996],
          [-0.6354,  0.8560, -0.2052, -0.4196]],
 
         [[-0.3849,  0.5611, -0.1167, -0.2752],
          [ 0.4433, -0.6210,  0.1290,  0.3138],
          [-0.1959,  0.2642, -0.0645, -0.1274],
          [ 0.3374, -0.5271,  0.1019,  0.2624]]], grad_fn=<TransposeBackward0>),
 tensor([[[0.2145, 0.3449, 0.3053, 0.1353],
          [0.2208, 0.2725, 0.0953, 0.4114],
          [0.2913, 0.2127, 0.2495, 0.2465],
          [0.1243, 0.4615, 0.2270, 0.1872]],
 
         [[0.1887, 0.4303, 0.2013, 0.1797],
          [0.2776, 0.1693, 0.1586, 0.3945],
          [0.2087, 0.3465, 0.2416, 0.2032],
          [0.3228, 0.2291, 0.2677, 0.1803]]], grad_fn=<MeanBackward1>))