In [1]:
import rootutils
root_path = rootutils.setup_root(".", indicator=".project-root", pythonpath=True)

In [2]:
import torch
from core.models.gpt import GPTModel

In [3]:
import tiktoken
tokenizer = tiktoken.get_encoding("gpt2")

In [4]:
GPT_CONFIG_124M = {
 "vocab_size": 50257, # Vocabulary size
 "context_length": 256, # Context length
 "emb_dim": 768, # Embedding dimension
 "n_heads": 12, # Number of attention heads
 "n_layers": 12, # Number of layers
 "dropout": 0.1, # Dropout rate
 "qvk_bias": False
}

In [5]:
file_path = 'C:/Users/WalterBuenodeBritoNe/Documents/GitHub/LLMPlayground/Data/the-verdict.txt'

with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

print(text[:100])

I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no g


In [6]:
total_characters = len(text)
total_tokens = len(tokenizer.encode(text))
print("Characters:", total_characters)
print("Tokens:", total_tokens)

Characters: 20480
Tokens: 5146


In [7]:
train_ratio = 0.9
train_size = int(train_ratio * len(text))
train_text = text[:train_size]
validation_text = text[train_size:]

print(f"Train size: {len(train_text)}")
print(f"Validation size: {len(validation_text)}")

train_tokens = tokenizer.encode(train_text)
validation_tokens = tokenizer.encode(validation_text)

print(f"Train tokens: {len(train_tokens)}")
print(f"Validation tokens: {len(validation_tokens)}")

Train size: 18432
Validation size: 2048
Train tokens: 4612
Validation tokens: 535


In [8]:
from core.data.dataloader import create_dataloader_v1

train_loader = create_dataloader_v1(
 train_text,
 batch_size=2,
 max_length=GPT_CONFIG_124M["context_length"],
 stride=GPT_CONFIG_124M["context_length"],
 drop_last=True,
 shuffle=True,
 num_workers=0
)

validation_loader = create_dataloader_v1(
 validation_text,
 batch_size=2,
 max_length=GPT_CONFIG_124M["context_length"],
 stride=GPT_CONFIG_124M["context_length"],
 drop_last=True,
 shuffle=True,
 num_workers=0
)

In [9]:
print("Train loader:")
for x, y in train_loader:
 print(x.shape, y.shape)
print("\nValidation loader:")
for x, y in validation_loader:
 print(x.shape, y.shape)

Train loader:
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])

Validation loader:
torch.Size([2, 256]) torch.Size([2, 256])


In [10]:
from core.training import GPTTrainer

In [11]:
torch.manual_seed(123)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = GPTModel(GPT_CONFIG_124M)
model.to(device)
optimizer = torch.optim.AdamW(
 model.parameters(),
 lr=0.0004, weight_decay=0.1
)
num_epochs = 10

Using device: cpu


In [12]:

trainer = GPTTrainer(model, epochs=num_epochs, eval_freq=5, eval_iter=5, train_loader=train_loader, validation_loader=validation_loader, optimizer=optimizer, device=device, tokenizer=tokenizer)
train_losses, val_losses, tokens_seen = trainer.train()

2025-08-17 10:51:34,076 - core.training.trainer - INFO - GPTTrainer inicializado com device: cpu
2025-08-17 10:51:34,077 - core.training.trainer - INFO - Modelo tem 162,419,712 parâmetros
2025-08-17 10:51:34,079 - core.training.trainer - INFO - Iniciando treinamento por 10 épocas


KeyboardInterrupt: 

In [14]:
torch.save({"model_state_dict": trainer.model.state_dict(), "optimizer_state_dict": trainer.optimizer.state_dict()}, "model_and_optimizer.pth")

In [15]:
checkpoint = torch.load("model_and_optimizer.pth")
model_gpt = GPTModel(GPT_CONFIG_124M)
model_gpt.load_state_dict(checkpoint["model_state_dict"])
model_gpt.to(device)
optimizer = torch.optim.AdamW(model_gpt.parameters(), lr=5e-4, weight_decay=0.1)
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

trainer_v1 = GPTTrainer(model_gpt, train_loader, validation_loader, optimizer, epochs=1, eval_freq=5, eval_iter=5, device=device, tokenizer=tokenizer)

trainer_v1.train()

2025-08-17 10:51:42,120 - core.training.trainer - INFO - GPTTrainer inicializado com device: cpu
2025-08-17 10:51:42,122 - core.training.trainer - INFO - Modelo tem 162,419,712 parâmetros
2025-08-17 10:51:42,123 - core.training.trainer - INFO - Iniciando treinamento por 1 épocas


KeyboardInterrupt: 

In [None]:
from core.data.utils import generate_text, text_to_token_ids, token_ids_to_text

trainer_v1.model.eval()
token_ids = generate_text(
 model=trainer_v1.model,
 idx=text_to_token_ids("Every effort moves you", trainer_v1.tokenizer).unsqueeze(0),
 max_new_tokens=15,
 context_size=GPT_CONFIG_124M["context_length"],
 top_k=25,
 temperature=1.4
)

print("Output text:\n", token_ids_to_text(token_ids.squeeze(0), trainer_v1.tokenizer))

Output text:
 Every effort moves you say began to go."

Sheoms he had down across the last


In [16]:
import urllib.request
url = (
 "https://raw.githubusercontent.com/rasbt/"
 "LLMs-from-scratch/main/ch05/"
 "01_main-chapter-code/gpt_download.py"
)
filename = url.split('/')[-1]
urllib.request.urlretrieve(url, filename)


from gpt_download import download_and_load_gpt2

settings, params = download_and_load_gpt2(
 model_size="124M", models_dir="gpt2"
)

File already exists and is up-to-date: gpt2\124M\checkpoint
File already exists and is up-to-date: gpt2\124M\encoder.json
File already exists and is up-to-date: gpt2\124M\hparams.json
File already exists and is up-to-date: gpt2\124M\model.ckpt.data-00000-of-00001
File already exists and is up-to-date: gpt2\124M\model.ckpt.index
File already exists and is up-to-date: gpt2\124M\model.ckpt.meta
File already exists and is up-to-date: gpt2\124M\vocab.bpe


In [17]:
from core.data import utils as data_utils

In [21]:
data_utils.load_weights_into_gpt(model_gpt, params)

AttributeError: 'GPTModel' object has no attribute 'pos_emb'