In [1]:
import yaml
import tiktoken
import torch
from torch import nn

from dataset import Data
from dataloader import get_data_loader
from embeddings import Embeddings
from transformer_block import TransformerBlock
from gpt2 import GPT2Model
from utils import text_to_tokens,tokens_to_text,generate_text
from loss import cross_entropy
from train import traininng_loop

with open("config.yaml","r") as f:
    config = yaml.safe_load(f)

In [2]:
with open("the-verdict.txt","r") as f:
    raw_text = f.read()

len(text_to_tokens(raw_text)[0])

5145

In [3]:
train_ratio = 0.9
split_index = int(len(raw_text) * train_ratio)
train_text = raw_text[:split_index]
val_text = raw_text[split_index:]


# Dataset 

In [4]:
train_dataset = Data(
    raw_text=train_text,
    tokenizer=tiktoken.get_encoding("gpt2"),
    context_length=config["context_window"],
    stride=config["stride"]
)

val_dataset = Data(
    raw_text=val_text,
    tokenizer=tiktoken.get_encoding("gpt2"),
    context_length=config["context_window"],
    stride=config["stride"]
)


# Dataloader 

In [5]:
train_dl = get_data_loader(
    train_dataset,
    batch_size=config["batch_size"],
    shuffle=config["shuffle"],
    drop_last=config["drop_last"],
    num_workers=config["num_workers"]
    )

val_dl = get_data_loader(
    val_dataset,
    batch_size=config["batch_size"],
    shuffle=config["shuffle"],
    drop_last=config["drop_last"],
    num_workers=config["num_workers"]
)


In [6]:
for x,y in train_dl:
    print(x.shape)
    print(y.shape)
    break

torch.Size([2, 256])
torch.Size([2, 256])


In [7]:
train_dl.batch_size

2

In [8]:
x.numel()

512

In [9]:
train_tokens = 0 
for x,y in train_dl:
    train_tokens += x.numel()
print(f"Train tokens: {train_tokens}")

val_tokens = 0
for x,y in val_dl:
    val_tokens += x.numel()
print(f"Val tokens: {val_tokens}")


print(f'total tokens: {train_tokens + val_tokens}')

Train tokens: 4608
Val tokens: 512
total tokens: 5120


In [10]:
model = GPT2Model(config)

with torch.no_grad():
    logits = model(x)
    print(logits.shape)


torch.Size([2, 256, 50257])


In [11]:
optimizer = torch.optim.AdamW(model.parameters(),lr=0.0004)

In [12]:
traininng_loop(
    model,
    train_dl,
    val_dl,
    loss_fn = cross_entropy,
    optimizer = optimizer,
    num_epochs = 10,
    device = "cpu",
    text_to_generate = "Every single step",
    look_back = config["context_window"],
    num_tokens_to_generate = config["num_tokens_to_generate"],
)

2025-04-28 11:58:27,402 - INFO - Epoch 1/10
2025-04-28 11:58:40,431 - INFO - Seen tokens: 4608
2025-04-28 11:58:40,433 - INFO - Loss: 9.1495
2025-04-28 11:58:40,675 - INFO - Validation Loss: 7.6709
2025-04-28 11:58:41,287 - INFO - Generated text: Every single step....................
2025-04-28 11:58:41,288 - INFO - Epoch 2/10
2025-04-28 11:58:51,335 - INFO - Seen tokens: 9216
2025-04-28 11:58:51,338 - INFO - Loss: 6.6612
2025-04-28 11:58:51,569 - INFO - Validation Loss: 6.7470
2025-04-28 11:58:52,152 - INFO - Generated text: Every single step                    
2025-04-28 11:58:52,153 - INFO - Epoch 3/10
2025-04-28 11:59:03,362 - INFO - Seen tokens: 13824
2025-04-28 11:59:03,364 - INFO - Loss: 8.2623
2025-04-28 11:59:03,637 - INFO - Validation Loss: 6.5521
2025-04-28 11:59:04,291 - INFO - Generated text: Every single step"""I.               
2025-04-28 11:59:04,292 - INFO - Epoch 4/10
2025-04-28 11:59:16,246 - INFO - Seen tokens: 18432
2025-04-28 11:59:16,248 - INFO - Loss: 5.5259
20

In [13]:
generate_text(
    text_to_generate = "Every single step",
    model = model,
    device = "cpu",
    look_back = config["context_window"],
    num_tokens_to_generate = config["num_tokens_to_generate"],
)

'Every single step you?"  "Yes--quite insensible to the irony. She wanted him vindicated--'