# Training The Model

This notebook will load a dataset and train the model to predict the next word in a sentence.


In [1]:
import sys
import torch

# Add the path to the parent directory to allow direct import from the gpt package
sys.path.append("../")

torch.__version__

'2.2.2'

## User an accelerator if available


In [5]:
device = torch.device("cpu")

if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")

device

device(type='mps')

## Create the dataset


In [4]:
from gpt.datasets import CharacterLevelTextDataset
from pathlib import Path

TEXT_DATA_PATH = Path("../data/bible-kjv.txt")
SEQUENCE_LENGTH = 10

# Create a dataset from the text file
dataset = CharacterLevelTextDataset(TEXT_DATA_PATH, SEQUENCE_LENGTH)
TEXT_DATA_PATH, len(dataset)

(PosixPath('../data/bible-kjv.txt'), 4351869)

## Train The Model


In [7]:
from gpt import GPT2, train
from torch import optim, nn

EPOCHS = 1
BATCH_SIZE = 32

model = GPT2(
    vocab_size=dataset.vocab_size,
    embedding_size=128,
    num_heads=4,
    num_layers=2,
    hidden_size=256,
).to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

train(
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    model=model,
    vocab_size=dataset.vocab_size,
    dataset=dataset,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
)

Epoch 1/1:   0%|          | 0/135996 [00:00<?, ?it/s]

Epoch 1/1: 100%|██████████| 135996/135996 [36:09<00:00, 62.70it/s]


Epoch 1/1, Average Loss: 0.1464
