In [1]:
import yaml
import tiktoken
import torch
from torch import nn

from dataset import Data
from dataloader import get_data_loader
from embeddings import Embeddings
from transformer_block import TransformerBlock
from gpt2 import GPT2Model
from utils import text_to_tokens,tokens_to_text

In [2]:
tokenizer = tiktoken.get_encoding("gpt2")
tokenizer.encode("Hello, how are <|endoftext|>?",allowed_special={'<|endoftext|>'})

[15496, 11, 703, 389, 220, 50256, 30]

In [5]:
text = "Hello, how are <|endoftext|>?"
tokens = text_to_tokens(text)
print(tokens)

print(tokens_to_text(tokens))


tensor([[15496,    11,   703,   389,   220, 50256,    30]])
Hello, how are <|endoftext|>?


In [2]:
with open("the-verdict.txt","r") as f:
    raw_text = f.read()

In [3]:
with open("config.yaml","r") as f:
    config = yaml.safe_load(f)

config

{'dropout': 0.0,
 'vocab_size': 50257,
 'embedding_dim': 4,
 'stride': 3,
 'batch_size': 2,
 'shuffle': False,
 'drop_last': True,
 'num_workers': 0,
 'context_window': 4,
 'num_heads': 2,
 'Q_K_V_bias': False,
 'kv_bias': False,
 'batch_first': True,
 'device': None,
 'n_layers': 2}

In [4]:
dataset = Data(
    raw_text=raw_text,
    tokenizer=tiktoken.get_encoding("gpt2"),
    context_length=config["context_window"],
    stride=config["stride"]
)

data_dl = get_data_loader(
    dataset,
    batch_size=config["batch_size"],
    shuffle=config["shuffle"],
    drop_last=config["drop_last"],
    num_workers=config["num_workers"]
    )


for x,y in data_dl:
    print(x)
    print(y)
    break

tensor([[  40,  367, 2885, 1464],
        [1464, 1807, 3619,  402]])
tensor([[ 367, 2885, 1464, 1807],
        [1807, 3619,  402,  271]])


In [5]:
model = GPT2Model(config)

model(x)

tensor([[[-0.0874,  1.2459, -0.1492,  ...,  0.7775,  0.4876,  0.2839],
         [ 0.2384,  0.9003, -0.2390,  ...,  0.5699,  0.3505,  0.4885],
         [-0.9144,  0.4846,  0.7778,  ..., -0.8264, -1.0812,  0.0228],
         [-1.5046,  1.5470,  0.3529,  ...,  0.3029,  0.0484, -0.5471]],

        [[-0.2793,  1.0643, -0.5660,  ...,  0.6920,  0.7035,  0.0178],
         [-1.0551,  1.5422, -0.3246,  ...,  0.7930,  0.7368, -0.4537],
         [-0.8954,  0.8422,  0.8463,  ..., -0.4559, -0.8643,  0.0516],
         [-1.2100,  0.1386,  0.4338,  ..., -1.1689, -1.0753, -0.2888]]],
       grad_fn=<ViewBackward0>)

In [6]:
model(x).shape

torch.Size([2, 4, 50257])