In [1]:
import yaml
import tiktoken
import torch
from torch import nn

from dataset import Data
from dataloader import get_data_loader
from embeddings import Embeddings
from transformer_block import TransformerBlock
from gpt2 import GPT2Model
from utils import text_to_tokens,tokens_to_text,generate_text
from loss import cross_entropy
from train import traininng_loop
from evaluation import eval

with open("config.yaml","r") as f:
    config = yaml.safe_load(f)

In [2]:
with open("the-verdict.txt","r") as f:
    raw_text = f.read()

len(text_to_tokens(raw_text)[0])

5145

In [3]:
train_ratio = 0.9
split_index = int(len(raw_text) * train_ratio)
train_text = raw_text[:split_index]
val_text = raw_text[split_index:]


# Dataset 

In [4]:
train_dataset = Data(
    raw_text=train_text,
    tokenizer=tiktoken.get_encoding("gpt2"),
    context_length=config["context_window"],
    stride=config["stride"]
)

val_dataset = Data(
    raw_text=val_text,
    tokenizer=tiktoken.get_encoding("gpt2"),
    context_length=config["context_window"],
    stride=config["stride"]
)


# Dataloader 

In [5]:
train_dl = get_data_loader(
    train_dataset,
    batch_size=config["batch_size"],
    shuffle=config["shuffle"],
    drop_last=config["drop_last"],
    num_workers=config["num_workers"]
    )

val_dl = get_data_loader(
    val_dataset,
    batch_size=config["batch_size"],
    shuffle=config["shuffle"],
    drop_last=config["drop_last"],
    num_workers=config["num_workers"]
)


In [6]:
# for x,y in train_dl:
#     print(x.shape)
#     print(y.shape)
#     break

In [7]:
# train_tokens = 0 
# for x,y in train_dl:
#     train_tokens += x.numel()
# print(f"Train tokens: {train_tokens}")

# val_tokens = 0
# for x,y in val_dl:
#     val_tokens += x.numel()
# print(f"Val tokens: {val_tokens}")


# print(f'total tokens: {train_tokens + val_tokens}')

In [8]:
model = GPT2Model(config)

# with torch.no_grad():
#     logits = model(x)
#     print(logits.shape)


In [9]:
optimizer = torch.optim.AdamW(model.parameters(),lr=0.0004)

In [10]:
traininng_loop(
    model,
    train_dl,
    val_dl,
    loss_fn = cross_entropy,
    optimizer = optimizer,
    num_epochs = 10,
    device = "cpu",
    text_to_generate = "Every single step",
    look_back = config["context_window"],
    num_tokens_to_generate = config["num_tokens_to_generate"],
)

2025-04-29 19:01:19,588 - INFO - Epoch 1/10
2025-04-29 19:01:31,784 - INFO - Seen tokens: 4608
2025-04-29 19:01:31,786 - INFO - Loss: 9.1594
2025-04-29 19:01:32,006 - INFO - Validation Loss: 7.7434
2025-04-29 19:01:32,662 - INFO - Generated text: Every single step....................
2025-04-29 19:01:32,662 - INFO - Epoch 2/10
2025-04-29 19:01:46,405 - INFO - Seen tokens: 9216
2025-04-29 19:01:46,407 - INFO - Loss: 6.6630
2025-04-29 19:01:46,643 - INFO - Validation Loss: 6.7265
2025-04-29 19:01:47,237 - INFO - Generated text: Every single step                    
2025-04-29 19:01:47,238 - INFO - Epoch 3/10
2025-04-29 19:02:01,673 - INFO - Seen tokens: 13824
2025-04-29 19:02:01,676 - INFO - Loss: 5.9025
2025-04-29 19:02:01,933 - INFO - Validation Loss: 6.6246
2025-04-29 19:02:02,674 - INFO - Generated text: Every single step                    
2025-04-29 19:02:02,674 - INFO - Epoch 4/10
2025-04-29 19:02:15,649 - INFO - Seen tokens: 18432
2025-04-29 19:02:15,651 - INFO - Loss: 5.7566
20

In [11]:
eval(
    model,
    val_loader=val_dl,
    loss_fn= cross_entropy,
    device='cpu'
)

2025-04-29 19:03:33,786 - INFO - Validation Loss: 6.3283


In [12]:
generate_text(
    text_to_generate = "Every single step",
    model = model,
    device = "cpu",
    look_back = config["context_window"],
    num_tokens_to_generate = config["num_tokens_to_generate"],
)

'Every single step sunlit terrace.               '

In [13]:
torch.set_printoptions(sci_mode=False,precision=10)

In [29]:
generate_text(
    "Hello what is ",
    model = model, 
    device = "cpu",
    look_back=20,
    num_tokens_to_generate=10,
    temperature=2.0,
    top_k=20,
)

'Hello what is  he he _ "Yes-- of all I haven'

In [16]:
1e-7

1e-07