In [1]:
import yaml
import tiktoken
import torch
from torch import nn

from dataset import Data
from dataloader import get_data_loader
from embeddings import Embeddings
from transformer_block import TransformerBlock
from gpt2 import GPT2Model
from utils import text_to_tokens,tokens_to_text,generate_text

In [3]:
with open("the-verdict.txt","r") as f:
    raw_text = f.read()

In [5]:
with open("config.yaml","r") as f:
    config = yaml.safe_load(f)

In [15]:
dataset = Data(
    raw_text=raw_text,
    tokenizer=tiktoken.get_encoding("gpt2"),
    context_length=config["context_window"],
    stride=config["stride"]
)

x,y = dataset[0]
print(x)

tensor([   40,   367,  2885,  1464,  1807,  3619,   402,   271, 10899,  2138,
          257,  7026, 15632,   438,  2016,   257,   922,  5891,  1576,   438,
          568,   340,   373,   645,  1049,  5975,   284,   502,   284,  3285,
          326,    11,   287,   262,  6001,   286,   465, 13476,    11,   339,
          550,  5710,   465, 12036,    11,  6405,   257,  5527, 27075,    11,
          290,  4920,  2241,   287,   257,  4489,    64,   319,   262, 34686,
        41976,    13,   357, 10915,   314,  2138,  1807,   340,   561,   423,
          587, 10598,   393, 28537,  2014,   198,   198,     1,   464,  6001,
          286,   465, 13476,     1,   438,  5562,   373,   644,   262,  1466,
         1444,   340,    13,   314,   460,  3285,  9074,    13, 46606,   536])


In [8]:


data_dl = get_data_loader(
    dataset,
    batch_size=config["batch_size"],
    shuffle=config["shuffle"],
    drop_last=config["drop_last"],
    num_workers=config["num_workers"]
    )


for x,y in data_dl:
    # print(x)
    # print(y)
    break

In [9]:
model = GPT2Model(config)

model(x)

tensor([[[-1.1885,  0.5600,  1.0801,  ...,  0.5421, -0.4958,  0.6919],
         [ 0.9804,  0.2871,  0.4639,  ..., -0.2104,  0.0651, -0.2675],
         [-1.1312, -0.0660,  0.0617,  ..., -0.5819, -0.2791, -0.8605],
         ...,
         [-1.0856,  0.5250,  0.9901,  ...,  0.0087, -0.3210,  0.3992],
         [-0.5326,  0.5916,  1.0956,  ...,  0.8331, -0.4603,  0.8156],
         [-0.6084, -0.4086, -0.4806,  ..., -0.2545, -0.3621, -1.3536]],

        [[-0.4239,  0.7177,  1.2581,  ...,  0.4373, -0.2909,  0.8573],
         [ 0.5074, -0.0963, -0.0696,  ...,  0.0706, -0.1918, -0.7700],
         [-0.4271, -0.3529, -0.4386,  ..., -0.7955, -0.1506, -1.4997],
         ...,
         [-0.8159, -0.3077, -0.3329,  ..., -0.6441, -0.2570, -1.3272],
         [ 0.9792,  0.2994,  0.4826,  ..., -0.2133,  0.0685, -0.2474],
         [-0.9457, -0.3531, -0.3584,  ..., -0.0186, -0.4830, -1.1320]]],
       grad_fn=<ViewBackward0>)

In [10]:
model(x).shape

torch.Size([2, 100, 50257])

In [13]:
generate_text(
    "every step takes you",
    model,
    "cpu",
    look_back=40,
    num_tokens_to_generate=3
)

'every step takes you schemes Explpport'

In [14]:
model(torch.randint(0,10000,(2,3))).shape

torch.Size([2, 3, 50257])