In [310]:
import torch

torch.__version__

'2.7.0'

In [311]:
import requests
with open("shakes.txt", "wb") as f:
    res = requests.get("https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt")
    f.write(res.content)
    

In [312]:
text = open("shakes.txt", "r").read()

In [313]:
text[:100]

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'

In [314]:
chars = sorted(list(set(text)))

"".join(chars)


"\n !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

In [315]:
vocab_size = len(chars)

vocab_size

65

In [316]:
stoi = {val:i for i,val in enumerate(chars)}
itos = {i:val for i,val, in enumerate(chars)}


encode = lambda s : [stoi[i] for i  in s]

decode = lambda s : "".join([itos[i] for i in s])

In [317]:
encode("hello")

[46, 43, 50, 50, 53]

In [318]:
import torch

data = torch.tensor(encode(text))

data

tensor([18, 47, 56,  ..., 45,  8,  0])

In [319]:
data.size()

torch.Size([1115394])

In [320]:
# split the data
n = int(0.9*len(data))
n
train_data = data[:n]
val_data = data[n:]

In [321]:
len(train_data), len(val_data)

(1003854, 111540)

In [322]:
train_data

tensor([18, 47, 56,  ..., 43, 56, 43])

In [323]:
block_size = 8
train_data[:block_size+1]

tensor([18, 47, 56, 57, 58,  1, 15, 47, 58])

In [324]:
x = train_data[:block_size]
y = train_data[1:block_size+1]

for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"The context is : {context} and target is : {target}")

The context is : tensor([18]) and target is : 47
The context is : tensor([18, 47]) and target is : 56
The context is : tensor([18, 47, 56]) and target is : 57
The context is : tensor([18, 47, 56, 57]) and target is : 58
The context is : tensor([18, 47, 56, 57, 58]) and target is : 1
The context is : tensor([18, 47, 56, 57, 58,  1]) and target is : 15
The context is : tensor([18, 47, 56, 57, 58,  1, 15]) and target is : 47
The context is : tensor([18, 47, 56, 57, 58,  1, 15, 47]) and target is : 58


In [325]:
torch.randint(100, size=(4,))

tensor([75, 18, 54, 97])

In [326]:
torch.manual_seed(42)
batch_size = 4
block_size = 8
def get_batch(split):
    data = train_data if split == "train" else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y =  torch.stack([data[i+1: i+block_size+1] for i in ix])
    return x,y

In [327]:
xb , yb  = get_batch("train")
print("Input:")
print(f"{xb}")

print("Output:")
print(f"{yb}")

Input:
tensor([[57,  1, 46, 47, 57,  1, 50, 53],
        [ 1, 58, 46, 43, 56, 43,  1, 41],
        [17, 26, 15, 17, 10,  0, 32, 53],
        [57, 58,  6,  1, 61, 47, 58, 46]])
Output:
tensor([[ 1, 46, 47, 57,  1, 50, 53, 60],
        [58, 46, 43, 56, 43,  1, 41, 39],
        [26, 15, 17, 10,  0, 32, 53,  1],
        [58,  6,  1, 61, 47, 58, 46,  0]])


In [328]:
xb

tensor([[57,  1, 46, 47, 57,  1, 50, 53],
        [ 1, 58, 46, 43, 56, 43,  1, 41],
        [17, 26, 15, 17, 10,  0, 32, 53],
        [57, 58,  6,  1, 61, 47, 58, 46]])

In [329]:
xb[0, :1]

tensor([57])

In [330]:
import torch
from torch import nn
from torch.nn import functional as f 
torch.manual_seed(42)

class BiagramModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)
        
    def forward(self, idx, target=None):
        logit = self.token_embedding_table(idx)
        
        if target is None:
            loss =  None
        else:
            B,T,C = logit.shape
            logit = logit.view(B*T, C)
            target = target.view(B*T) 
            loss = f.cross_entropy(logit, target)
        return logit, loss
    def generate(self, idx, max_tokens): 
        for _ in range(max_tokens):
            logits, _ = self(idx)
            logits = logits[:,-1,:]
            probs = f.softmax(logits,1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx,idx_next), 1)
        
        return idx
    

        
        
        

In [331]:
m = BiagramModel(vocab_size=vocab_size)

logit , loss = m(xb,yb)

loss, logit.shape

(tensor(4.8865, grad_fn=<NllLossBackward0>), torch.Size([32, 65]))

In [332]:
decode(m.generate(torch.zeros((1,1), dtype=torch.long), max_tokens=100)[0].tolist())

"\no$,q&IWqW&xtCjaB?ij&bYRGkF?b; f ,CbwhtERCIfuWr,DzJERjhLlVaF&EjffPHDFcNoGIG'&$qXisWTkJPw\n ,b Xgx?D3sj"

In [333]:
optimizer = torch.optim.Adam(m.parameters(), lr=0.001)

In [338]:
for steps in range(10000):
    xb , yb  = get_batch("train")
    logit , loss = m(xb,yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    print(loss.item())

    


2.1582136154174805
2.9632277488708496
2.751225471496582
2.4891397953033447
2.1651506423950195
2.2557921409606934
2.4421846866607666
2.65877628326416
2.499687671661377
2.715456247329712
2.8042619228363037
2.2912209033966064
2.7583200931549072
2.3137130737304688
2.536001443862915
2.491900682449341
2.0255658626556396
2.3935775756835938
2.4682440757751465
2.3005948066711426
2.685576915740967
2.9239730834960938
2.1765284538269043
2.7829020023345947
2.4443602561950684
2.481332540512085
2.5008692741394043
2.5796000957489014
2.3523666858673096
2.3790371417999268
2.4390952587127686
2.578113555908203
2.834721326828003
2.624595880508423
2.4088850021362305
2.862820625305176
2.467064142227173
2.3623363971710205
2.7384414672851562
2.2793195247650146
2.4408624172210693
2.2617125511169434
2.3009629249572754
2.469208240509033
2.854457139968872
2.732522487640381
2.4641668796539307
2.459400177001953
2.556629180908203
2.5124258995056152
2.192128896713257
2.381049394607544
2.308652400970459
2.5018005371093

In [343]:
print(decode(m.generate(torch.zeros((1,1), dtype=torch.long), max_tokens=100)[0].tolist()))


CES:
CI an touris ganert lst;
An hing bouee ave ncithiotht if on V:
KIENoaverees be,
ENEThe whath:
T


In [336]:
x = torch.randn(1,2,3)
x.shape

torch.Size([1, 2, 3])