In [1]:
!git clone https://github.com/karpathy/minGPT.git

fatal: destination path 'minGPT' already exists and is not an empty directory.


In [2]:
%cd minGPT

/content/minGPT


In [3]:
from mingpt.utils import set_seed
set_seed(42)

In [4]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F

In [31]:
import math
from torch.utils.data import Dataset

class CharDataset(Dataset):

    def __init__(self, data, block_size):
        chars = list(set(data))
        data_size, vocab_size = len(data), len(chars)
        print('data has %d characters, %d unique.' % (data_size, vocab_size))
        
        self.stoi = { ch:i for i,ch in enumerate(chars) }
        self.itos = { i:ch for i,ch in enumerate(chars) }
        self.block_size = block_size
        self.vocab_size = vocab_size
        self.data = data
    
    def __len__(self):
        return math.ceil(len(self.data) / (self.block_size + 1))

    def __getitem__(self, idx):
        # we're actually going to "cheat" and pick a spot in the dataset at random
        i = np.random.randint(0, len(self.data) - (self.block_size + 1))
        chunk = self.data[i:i+self.block_size+1]
        dix = [self.stoi[s] for s in chunk]
        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        return x, y

In [32]:
block_size = 128 # spatial extent of the model for its context

In [33]:
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt

--2020-09-24 11:21:40--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt.1’


2020-09-24 11:21:40 (24.1 MB/s) - ‘input.txt.1’ saved [1115394/1115394]



In [34]:
text = open('input.txt', 'r').read() # don't worry we won't run out of file handles
train_dataset = CharDataset(text, block_size)

data has 1115394 characters, 65 unique.


In [35]:
from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size,
                  n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf)

09/24/2020 11:21:42 - INFO - mingpt.model -   number of parameters: 2.535219e+07


In [36]:
import gc
gc.collect()

64

In [37]:
from mingpt.trainer import Trainer, TrainerConfig

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=3, batch_size=256, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=512*20, final_tokens=200*len(train_dataset)*block_size,
                      num_workers=4)
trainer = Trainer(model, train_dataset, None, tconf)
trainer.train()



  0%|          | 0/34 [00:00<?, ?it/s][A[A

epoch 1 iter 0: train loss 4.34087. lr 6.000000e-04:   0%|          | 0/34 [00:02<?, ?it/s][A[A

epoch 1 iter 0: train loss 4.34087. lr 6.000000e-04:   3%|▎         | 1/34 [00:02<01:07,  2.04s/it][A[A

epoch 1 iter 1: train loss 3.70672. lr 5.999999e-04:   3%|▎         | 1/34 [00:04<01:07,  2.04s/it][A[A

epoch 1 iter 1: train loss 3.70672. lr 5.999999e-04:   6%|▌         | 2/34 [00:04<01:04,  2.02s/it][A[A

epoch 1 iter 2: train loss 5.52498. lr 5.999998e-04:   6%|▌         | 2/34 [00:06<01:04,  2.02s/it][A[A

epoch 1 iter 2: train loss 5.52498. lr 5.999998e-04:   9%|▉         | 3/34 [00:06<01:02,  2.01s/it][A[A

epoch 1 iter 3: train loss 4.34438. lr 5.999996e-04:   9%|▉         | 3/34 [00:08<01:02,  2.01s/it][A[A

epoch 1 iter 3: train loss 4.34438. lr 5.999996e-04:  12%|█▏        | 4/34 [00:08<01:00,  2.01s/it][A[A

epoch 1 iter 4: train loss 3.95912. lr 5.999993e-04:  12%|█▏        | 4/34 [00:10<01:00,  2.01s/it][A[

In [38]:
from mingpt.utils import sample

context = "O God, O God!"
x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].to(trainer.device)
y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
print(completion)

O God, O God!
AN:


SINRE:IO:So mear t the s, f be avimithilath s thantes a be dinchis ssimas pe ho s sees s, d mordary.Soris sonthayoner mee sess asere sest doureseathe fouthomy,

HUF:
Hesurare hothicithom, higot te te mongheres sel wie he haithe foreleryoncon al steng ald:
Ade mato hendelde,
I th t thowhat f woworend sen, aleres, t soucerillerouthangha we hise, thy matrotansor thad ata s te w my.

PRATTIO:


PY:
A: thathof aickindor me t terd is se t s ssulinof hat, wighathan,
Pere massthethu the toth's t all as f bo faneasithie tsury be sudsuee, theathoue shisse methetind sir tand bersules wominghier th t herarstllstot sit phe wh ffr m ta halesurish o bis ou haritheere sthicon fus pang tho strerithentho bess serer or bouder s atherimie hendit thist hus he tat f wan thathiss ber s th,
Fonce t th he o f s ancuse hit tos se f man wile t ain heris, her teesowhithe teanerthancldis toun t boutond d basstay.


Mant fo wean hencasoullenthth winthistowhilld fistlanthordo honchosoryo shederig