#### Training the Transformer model, as emphasized in "Attention is All You Need" implemented by Andrej Karpathy.

In [1]:
! pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.6.0


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
! cp /content/drive/"My Drive"/"Colab Notebooks"/kleineGPT/review_card.py /content/review_card.py

In [4]:
! cp /content/drive/"My Drive"/"Colab Notebooks"/kleineGPT/model.py /content/model.py

In [5]:
! cp /content/drive/"My Drive"/"Colab Notebooks"/kleineGPT/model-gpt-01.pkl /content/model-gpt-01.pkl

In [6]:
import io
import torch
import tiktoken
import pickle
import argparse
from model import BigramLanguageModel
from review_card import ReviewCard
from dataclasses import dataclass


In [7]:
device ='cuda' if torch.cuda.is_available() else 'cpu'
review_handler = ReviewCard()
torch.manual_seed(1337)

<torch._C.Generator at 0x7815edb67330>

In [21]:
@dataclass
class GPTConfig:
    block_size: int = 64
    batch_size:int = 16
    vocab_size:int = 0
    n_layer: int = 2
    n_head: int = 2
    n_embd: int = 384
    dropout: float = 0.2
    learning_rate:float = 3e-4
    max_iters:int = 1000
    eval_iters:int = 100
    eval_interval:int = 200
    bias: bool = True # True: bias in Linears and LayerNorms, like GPT-2. False: a bit better and faster


In [10]:
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else: return super().find_class(module, name)



In [12]:
text = review_handler.review_cralwer(page_size=20)



  html = BeautifulSoup(res.text)


In [13]:
data  = '\n'.join(str(row['message']) for row in text)

In [14]:
chars=sorted(list(set(data)))
vocab_size = len(chars)
print(vocab_size)

128


In [15]:
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for i,ch in enumerate(chars)}
encode =lambda s:[stoi[c] for c in s] #encoder: take a string, output a list of integers
decode = lambda l: ''.join([itos[i] for i in l]) #encoder: take a integer, output a string


data_enc = torch.tensor(encode(data), dtype=torch.long)

n = int(0.9 * len(data_enc)) # 90% will be train, rest val

train = data_enc[:n]
val = data_enc[n:]

In [16]:
config_meta = {}
config_meta['vocab_size'] = vocab_size if vocab_size else 50304

In [17]:
prompt = "I enjoyed the crispy chicken at KFC because"

In [18]:
context = torch.tensor(encode(prompt), dtype=torch.long, device=device)

In [22]:
config = GPTConfig(**config_meta)
model = BigramLanguageModel(config)
with open("model-gpt-01.pkl", 'rb') as f:
    model = CPU_Unpickler(f).load() # pickle.load(f)

m =model.to(device)

In [23]:
print(decode(m.generate(context.unsqueeze(0), max_new_tokens=1000)[0].tolist()))

I enjoyed the crispy chicken at KFC because  g Soten noommhhph n l adooceoer as.  ir tgeerkreopor  h  s oacreoct p!ly ce tes sh 0 sat  anwon sen me on b
nen c teint l.L s ticy f vea eh. Wofogkh.srae e e ad d r a,y bhnda riseouc d us ag J inwanhneomie  ic h ste: s lnore n I elw c b t edig aw nfuf w prgmyo* 0tc/ad te aniueK urh sr,uloe rs m nee I c. hrh uttahtLolitrosinaow  tn aC m
ndl hledcs ln   on hS tn,cay on0r pv aoobhisor w tatrionaogeaos iniuop t rcOomt r s.e t Iee3inne s ves reevFH-i i ch K in
ad .an k nowucior ousorh goK.n ot Dcwr c cl  obedFian sthic or K vh  c r f sice
eykee n oce
d Ny g enkasan tee  .lreosr beC eaacI an tuhee c .ini8 togseut ceort d n. bgmt uteg ht e ld Cner  uag cw! d odat h
hrf dus rke eetome n us l n vrnr pctn sos  oiuwe ksec Fn t dinh meageMasore be e mic2osufiawohky esr sadof ce
ene.he tihe.  n Dm  y Ccec mn o naen sd o  pel  id rl noAhaw syphrn orc n Tlhhe Kg fs lehfh ooD
s won FgrosWkurg t sr
h m K peotTah ucowithen ig res vi t  k  wosdrie lny.os.a:t, 