In [1]:
import sys
from pathlib import Path

here = Path.cwd().resolve()
repo_root = here if (here / "src").exists() else here.parents[1]

if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

In [2]:
import torch
import os
import urllib

In [3]:
file_path = "the-verdict.txt"
with open(file_path, "r", encoding="utf-8") as file:
    text_data = file.read()

In [4]:
import tiktoken
tokenizer = tiktoken.get_encoding("gpt2")

In [5]:
total_characters = len(text_data)
total_tokens = len(tokenizer.encode(text_data))
print("Characters:", total_characters)
print("Tokens:", total_tokens)

Characters: 20479
Tokens: 5145


In [6]:
train_ratio = 0.90
split_idx = int(train_ratio * len(text_data))
train_data = text_data[:split_idx]
val_data = text_data[split_idx:]

In [7]:
len(train_data), len(val_data)

(18431, 2048)

In [8]:
from src.gpt_blocks.data_loader import create_dataloader_v1

In [9]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,
    "context_length": 256,
    "embed_dim": 768,
    "n_layers": 12,
    "n_heads": 12,
    "drop_rate": 0.1,
    "qkv_bias": False
}

In [10]:
torch.manual_seed(123)
train_loader = create_dataloader_v1(
        train_data,
        batch_size=2,
        max_length=GPT_CONFIG_124M["context_length"],
        stride=GPT_CONFIG_124M["context_length"],
        drop_last=True,
        shuffle=True,
        num_workers=0
)

val_loader = create_dataloader_v1(
        val_data,
        batch_size=2,
        max_length=GPT_CONFIG_124M["context_length"],
        stride=GPT_CONFIG_124M["context_length"],
        drop_last=False,
        shuffle=False,
        num_workers=0
)

In [11]:
print("Train loader:")
for x, y in train_loader:
    print(x.shape, y.shape)

Train loader:
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])


In [12]:
print("Val loader:")
for x, y in val_loader:
    print(x.shape, y.shape)

Val loader:
torch.Size([2, 256]) torch.Size([2, 256])


In [13]:
from src.gpt_blocks.loss_calc import calc_loss_loader
from src.gpt_blocks.gpt_model import GPTModel

# Training

In [14]:
from src.gpt_blocks.train_llm import train_model

In [15]:
torch.manual_seed(123)
model = GPTModel(GPT_CONFIG_124M)
optimizer = torch.optim.AdamW(
model.parameters(),
lr=0.0004, weight_decay=0.1
)
num_epochs = 10
train_losses, val_losses, tokens_seen = train_model(
model, train_loader, val_loader, optimizer, device=torch.device("cpu"),
num_epochs=num_epochs, eval_freq=5, eval_iter=5,
start_context="Every effort moves you", tokenizer=tokenizer
)

=== Generated Sample ===
Every effort moves you,,,,,,,,,,,,,,.



































=== Generated Sample ===
Every effort moves you, and, and, and, and, and, and, and, and,, and,,,,, and, and,, and, and, and,,, and,,, and, and,,, and, and
=== Generated Sample ===
Every effort moves you, and to to to to to to to to to to to to to to to to to to to to to to to to to to to the to to the to to to to to to to to the to to to to to to to to
Ep 4 (Step 000035), Train loss 4.917, Val loss 6.329
=== Generated Sample ===
Every effort moves you.

















































=== Generated Sample ===
Every effort moves you know the, and in the picture--I was, and I had been the.

"I, the fact, and.
"I, and I had been the.
"I, the fact, and the picture.
"


KeyboardInterrupt: 