In [3]:
import sys 
import os 
sys.path.append(os.path.abspath(".."))

from GPT.GPT_Model import TransformerBlock
import torch 
import torch.nn as nn
import json

In [4]:
import json
with open("../GPT_Model_Configuration/GPT_config_124M.json", "r") as f:
    GPT_CONFIG_124M = json.load(f)

In [5]:
torch.manual_seed(123)
x = torch.rand(2, 4, 768) 
block = TransformerBlock(GPT_CONFIG_124M)
output = block(x)
print("Input shape:", x.shape)
print("Output shape:", output.shape)

Input shape: torch.Size([2, 4, 768])
Output shape: torch.Size([2, 4, 768])


In [6]:
from torchinfo import summary

In [7]:
summary(block)

Layer (type:depth-idx)                   Param #
TransformerBlock                         --
├─MultiHeadAttention: 1-1                --
│    └─Linear: 2-1                       590,592
│    └─Linear: 2-2                       590,592
│    └─Linear: 2-3                       590,592
│    └─Linear: 2-4                       590,592
│    └─Dropout: 2-5                      --
├─FeedForward: 1-2                       --
│    └─Sequential: 2-6                   --
│    │    └─Linear: 3-1                  2,362,368
│    │    └─GELU: 3-2                    --
│    │    └─Linear: 3-3                  2,360,064
├─LayerNorm: 1-3                         1,536
├─LayerNorm: 1-4                         1,536
├─Dropout: 1-5                           --
Total params: 7,087,872
Trainable params: 7,087,872
Non-trainable params: 0

In [8]:
import tiktoken 
tokenizer = tiktoken.get_encoding("gpt2")
batch = []
txt1 = "Every effort moves you"
txt2 = "Every day holds a"
batch.append(torch.tensor(tokenizer.encode(txt1)))
batch.append(torch.tensor(tokenizer.encode(txt2)))
batch = torch.stack(batch, dim = 0)

In [9]:
print(batch)

tensor([[6109, 3626, 6100,  345],
        [6109, 1110, 6622,  257]])


In [10]:
print(batch.shape)

torch.Size([2, 4])


In [11]:
from GPT.GPT_Model import GPTModel

In [12]:
model = GPTModel(GPT_CONFIG_124M)

In [13]:
out = model(batch)

In [14]:
print("Input batch:\n", batch)
print("\nOutput shape:", out.shape)
# print(out)

Input batch:
 tensor([[6109, 3626, 6100,  345],
        [6109, 1110, 6622,  257]])

Output shape: torch.Size([2, 4, 50257])


In [15]:
print(summary(model))

Layer (type:depth-idx)                   Param #
GPTModel                                 --
├─Embedding: 1-1                         38,597,376
├─Embedding: 1-2                         786,432
├─Dropout: 1-3                           --
├─Sequential: 1-4                        --
│    └─TransformerBlock: 2-1             --
│    │    └─MultiHeadAttention: 3-1      2,362,368
│    │    └─FeedForward: 3-2             4,722,432
│    │    └─LayerNorm: 3-3               1,536
│    │    └─LayerNorm: 3-4               1,536
│    │    └─Dropout: 3-5                 --
│    └─TransformerBlock: 2-2             --
│    │    └─MultiHeadAttention: 3-6      2,362,368
│    │    └─FeedForward: 3-7             4,722,432
│    │    └─LayerNorm: 3-8               1,536
│    │    └─LayerNorm: 3-9               1,536
│    │    └─Dropout: 3-10                --
│    └─TransformerBlock: 2-3             --
│    │    └─MultiHeadAttention: 3-11     2,362,368
│    │    └─FeedForward: 3-12            4,722,432
│   

In [16]:
from GPT.Text_Generation import generate_simple_text

In [17]:
start_context = "Hello, i am"
encoded = tokenizer.encode(start_context)
print(f"Encoded text: {encoded}")
encoded_tensor = torch.tensor(encoded).unsqueeze(0)
print(f"Encoded tensor shape: {encoded_tensor.shape}")

Encoded text: [15496, 11, 1312, 716]
Encoded tensor shape: torch.Size([1, 4])


In [18]:
model.eval()
out = generate_simple_text(model=model, 
                           idx = encoded_tensor, max_new_tokens=6, 
                           context_size=GPT_CONFIG_124M["context_length"])
print(f"Output: {out}")

Output: tensor([[15496,    11,  1312,   716, 42280, 22255,  8170, 35468, 48478, 12309]])


In [19]:
print(f"Output Length: {len(out[0])}")

Output Length: 10


In [20]:
decoded_text = tokenizer.decode(out.squeeze(0).tolist())
print(decoded_text)

Hello, i am lil Nak Exper amenitiesBomb peaceful
