<h1>Character Level GPT on Text Data</h1>

In [1]:
import logging

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%d/%m/%Y %H:%M:%S",
    level=logging.INFO)

In [2]:
from utils import set_seed
set_seed(42)

In [3]:
import numpy as numpy
import torch
import torch.nn as nn
from torch.nn import functional as F

In [4]:
import math
from torch.utils.data import Dataset

class CharDataset(Dataset):
    def __init__(self, data, block_size):
        chars = sorted(list(set(data)))
        data_size, vocab_size = len(data), len(chars)
        print("data has %d characters, %d unique." % (data_size, vocab_size))

        self.stoi = {ch:i for i, ch in enumerate(chars)}
        self.itos = {i:ch for i, ch in enumerate(chars)}
        self.block_size = block_size
        self.vocab_size = vocab_size
        self.data = data

    def __len__(self):
        return len(self.data) - self.block_size

    def __getitem__(self, idx):
        chunk = self.data[idx : idx+self.block_size+1]
        dix = [self.stoi[s] for s in chunk]

        x = torch.tensor(dix[:-1], dtype = torch.long)
        y = torch.tensor(dix[1:], dtype = torch.long)
        return x, y


In [5]:
block_size = 32

In [6]:
#!wget https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt

In [7]:
text = open("./fairytales.txt", "r").read()
train_dataset = CharDataset(text, block_size)

data has 509351 characters, 86 unique.


In [8]:
from model import GPT, GPTconfig
mconf = GPTconfig(train_dataset.vocab_size, train_dataset.block_size,
                  n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf)

21/05/2021 13:21:17 - INFO - model - Number of parameters : 2.532454e+07


In [9]:
from trainer import Trainer, TrainerConfig

tconf = TrainerConfig(max_epochs=5, batch_size=512, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*block_size,
                      num_workers=4)
trainer = Trainer(model, train_dataset, None, tconf)

In [10]:
torch.cuda.empty_cache()
trainer.train()

epoch 1 iter 994: train loss 1.11714. lr 0.0003001480838414411: 100%|██████████| 995/995 [26:07<00:00,  1.58s/it] 
epoch 2 iter 994: train loss 0.90305. lr 5.9999999999999995e-05: 100%|██████████| 995/995 [26:05<00:00,  1.57s/it]
epoch 3 iter 994: train loss 0.84035. lr 0.00030014808384144087: 100%|██████████| 995/995 [26:06<00:00,  1.57s/it]
epoch 4 iter 994: train loss 0.71084. lr 0.0005999998538078394: 100%|██████████| 995/995 [26:46<00:00,  1.61s/it] 
epoch 5 iter 994: train loss 0.49762. lr 0.00029955574862000156: 100%|██████████| 995/995 [26:56<00:00,  1.62s/it]


In [12]:
torch.save(model.state_dict(), "./saved_models/fairytale-05-epochs")

In [8]:
from model import GPT, GPTconfig
mconf = GPTconfig(train_dataset.vocab_size, train_dataset.block_size,
                  n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf)
model.load_state_dict(torch.load("./saved_models/fairytale-05-epochs"))

20/05/2021 21:33:47 - INFO - model - Number of parameters : 2.532966e+07


<All keys matched successfully>

## Generating text using Sample as starter.

In [14]:
from utils import sample

context = "The sun shone in the sky."
x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].to(trainer.device)
y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
print(completion)

The sun shone in the sky."
"It is not so, nor it was not so," said Mr. Fox.
"And then—and then I opened the door, and when no can yet do a string, and dragged it along after you." "I'll do so another time," replied Jack.
On Wednesday, Jack went back and told them to fly before him."
"Alas!" quoth the prince, "what shall we do to him?" said one to the other.
"I have many ranks," said Tom, "my mother did not often care to cross him; indeed, the more she struggled and fought as if she could find a place. So off he went there he should hear joyful news. He made little count of them all. Then he took out it and got into the garden for the apples, he could see all that was passing in the world. And he went there was a little boy, and two well-diggers, and two ditch-diggers, and a bear, and a wolf, and I can outrun you too-o-o-o!"
"Ye can. can ye?" growled the bear, "we'll see about that!" and trotted as fast as he comes. He said: "What have I done? I promised to give the giant who carried me