In [29]:
with open('stranger.txt', 'r', encoding='utf-8') as f:
    text = f.read()

In [30]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(''.join(chars))
print(vocab_size)


 !"'(),-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWYabcdefghijklmnopqrstuvwxyz«—♦
77


In [31]:
stoi = { ch:i for i,ch in enumerate(chars) }
itos = {i:ch for i,ch in enumerate(chars)}
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

In [32]:
import torch
data = torch.tensor(encode(text),dtype=torch.long)
print(data.shape)
print(data)

torch.Size([194552])
tensor([36, 38, 43,  ..., 61,  9,  1])


In [33]:
n = int(.9*len(data))
train_data = data[:n]
test_data = data[n:]

In [34]:
block_size = 8
train_data[:block_size+1]

tensor([36, 38, 43, 31, 28, 41,  1, 51, 56])

In [35]:
torch.manual_seed(1337)
batch_size = 4
block_size = 8
embedding_size = 512

def get_batch(split):
    data = train_data if split == 'train' else test_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x,y

xb,yb = get_batch('train')
print(xb)
print(yb)  

tensor([[50, 55,  1, 66, 50, 52, 61, 52],
        [52,  1,  0, 67, 62,  1, 67, 55],
        [62, 66, 67, 56, 59, 52,  1, 67],
        [56, 66, 67, 52, 61,  1, 67, 62]])
tensor([[55,  1, 66, 50, 52, 61, 52, 66],
        [ 1,  0, 67, 62,  1, 67, 55, 56],
        [66, 67, 56, 59, 52,  1, 67, 62],
        [66, 67, 52, 61,  1, 67, 62,  1]])


In [36]:
import torch
import torch.nn as nn
from torch.nn import functional as F
torch.manual_seed(1337)

class BigramLanguageModel(nn.Module):
  
  def __init__(self, vocab_size):
    super().__init__()
    self.token_embedding_table = nn.Embedding(vocab_size,vocab_size)

  def forward(self, idx, targets=None):
    logits = self.token_embedding_table(idx)
    B,T,C = logits.shape
    logits = logits.view(B*T,C)
    
    loss = None
    if targets is not None:
      targets = targets.view(B*T)
      loss = F.cross_entropy(logits,targets)

    logits = logits.view(B,T,C)
    return logits, loss

  def generate(self,idx,max_new_tokens):
    for _ in range(max_new_tokens):
      logits, loss = self(idx)
      logits = logits[:,-1,:]
      probs = F.softmax(logits,dim=-1)
      idx_next = torch.multinomial(probs,num_samples=1).view(1,1)
      idx = torch.cat((idx,idx_next),dim=1)
    return idx

m = BigramLanguageModel(vocab_size)
logits, loss = m(xb,yb)
print(logits.shape)
print(loss)

idx = torch.zeros((1,1),dtype=torch.long)
print(decode(m.generate(idx,max_new_tokens=100)[0].tolist()))

torch.Size([4, 8, 77])
tensor(4.4398, grad_fn=<NllLossBackward0>)

f6wbJ 
4F6yc9.8H5su98Vf2Vg"Hi!a!—;J'it,iI!FQr'iD)w4w8
o3reY97N))L
w5QLL'lk1 rCIqce♦9eoEkhB8u0fFRCu r


In [37]:
optimizer = torch.optim.AdamW(m.parameters(), lr=1e-3)

In [38]:
batch_size = 32
for steps in range(10000):
  xb,yb = get_batch('train')

  logits, loss = m(xb,yb)
  optimizer.zero_grad(set_to_none=True)
  loss.backward()
  optimizer.step()
print(loss.item())


2.4311487674713135


In [41]:
print(decode(m.generate(idx,max_new_tokens=100)[0].tolist()))


crd I I't Affef avoke s tinthes whanlin'tot aring t SThirotito d r wer o ence; blisoke — orouied ant
