In [1]:
import sys, os
sys.path.insert(0, os.getcwd())


In [11]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader

from model.transformer import MiniTransformer
from model.tokenizer import SimpleTokenizer
from utils.dataset import TextDataset

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 3
BATCH_SIZE = 32
SEQ_LEN = 256

tokenizer = SimpleTokenizer()
dataset = TextDataset("data", tokenizer, SEQ_LEN)

model = MiniTransformer(len(tokenizer.stoi)).to(DEVICE)
print("Params:", sum(p.numel() for p in model.parameters()) / 1e6, "M")

loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
opt = optim.AdamW(model.parameters(), lr=3e-4)

model.train()
for epoch in range(EPOCHS):
    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        logits = model(x)
        loss = torch.nn.functional.cross_entropy(
            logits.view(-1, logits.size(-1)),
            y.view(-1)
        )

        opt.zero_grad()
        loss.backward()
        opt.step()

    print(f"Epoch {epoch+1} | Loss {loss.item():.4f}")

os.makedirs("checkpoints", exist_ok=True)
torch.save(model.state_dict(), "checkpoints/model.pt")
tokenizer.save("checkpoints/tokenizer.json")


FileNotFoundError: [Errno 2] No such file or directory: 'data/text.txt'

In [7]:
import importlib
import model.transformer

importlib.reload(model.transformer)

from model.transformer import MiniTransformer
print("Loaded:", MiniTransformer)


Loaded: <class 'model.transformer.MiniTransformer'>


In [6]:
import model.transformer as t
print(dir(t))


['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__']
