In [41]:
with open('input.txt', 'r', encoding = 'utf-8') as f:
    text = f.read()


In [42]:
print(text[:1000])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [43]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

In [44]:
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

In [45]:
import torch
import torch.nn as nn
from torch.nn import functional as F

In [46]:
data = torch.tensor(encode(text), dtype=torch.long)

In [47]:
n = int(0.8*len(data))
train_data = data[:n]
val_data = data[n:]

def get_batch(split):
  data = train_data if split == 'train' else val_data
  ix = torch.randint(len(data) - block_size, (batch_size,))
  print(ix)
  x = torch.stack([data[i:i+block_size] for i in ix])
  y = torch.stack([data[i+1:i+block_size+1] for i in ix])
  return x, y

x, y = get_batch('train')
print('inputs:')
print(x)
print('targets:')
print(y)

tensor([ 95344, 551668, 617427, 570918])
inputs:
tensor([[58,  1, 57, 46, 39, 50, 50,  1],
        [43,  8,  1, 19, 53, 42,  1, 44],
        [57, 58,  1, 53, 44,  1, 58, 46],
        [46, 43,  1, 61, 39, 57,  1, 41]])
targets:
tensor([[ 1, 57, 46, 39, 50, 50,  1, 40],
        [ 8,  1, 19, 53, 42,  1, 44, 53],
        [58,  1, 53, 44,  1, 58, 46, 63],
        [43,  1, 61, 39, 57,  1, 41, 53]])


In [48]:
block_size = 8
batch_size = 4
x = train_data[:block_size]
y = train_data[1:block_size+1]

for i in range(block_size):
    context = x[:i+1]
    target = y[i]
    print(f"when input is {context} the target: {target}")

when input is tensor([18]) the target: 47
when input is tensor([18, 47]) the target: 56
when input is tensor([18, 47, 56]) the target: 57
when input is tensor([18, 47, 56, 57]) the target: 58
when input is tensor([18, 47, 56, 57, 58]) the target: 1
when input is tensor([18, 47, 56, 57, 58,  1]) the target: 15
when input is tensor([18, 47, 56, 57, 58,  1, 15]) the target: 47
when input is tensor([18, 47, 56, 57, 58,  1, 15, 47]) the target: 58


In [49]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [50]:
class BigramLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, index, targets = None):
      logits = self.token_embedding_table(index)
      if targets is None:
        loss = None
      else:
        B, T, C = logits.shape
        logits = logits.view(B*T, C)
        targets = targets.view(B*T)
        loss = F.cross_entropy(logits, targets)
      return logits, loss

    def generate(self, index, max_new_tokens):
      for _ in range(max_new_tokens):
        logits, loss = self.forward(index)
        logits = logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)
        index_next = torch.multinomial(probs, num_samples=1)
        index = torch.cat((index, index_next), dim=1)
      return index


model= BigramLanguageModel(vocab_size)
m = model.to(device)
context = torch.zeros((1,1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)





baskUWCc
xM'nCNCZG
GbNB3E?DZ;SYiHF'YKUDEOKvwh:fGIO:VtfdeAK,cQzWp3SgoZamSDCEZHckQIVx3r'YPF$IEjUC-DOo&kH?nvQdWkQqAihST.,C?UfbC3LlNEUUOJtws&H3dq&Yfxzsbah.iw$I&AsVYirmnSCoe:EOiroqsqlB'd?!Famt,msClCJU;gsZgYYOElP..aw!I$
OevrHSR3?'mZxTLfOOl?iYH G;dsnJdTDfK; GF c
Gkl Ifk'd'GPXqD3goxzJgo
Fo.aySYuAZskUeK,pp?!zuz?-zGjM$Y,CGzfDTLh'cXqkSd;W3&HyOTDmITNE,nbdebYbZ3LlJPFTDZeLr&'::j!raraaSCL
d&' qsc
uAu&H.LtHKQiBlvn;DonHZ&HSDOFpoz-zdLfD3lCNJmo?Kl..Clp$FILf3Au3aNEU?R.YFX& RjPGjYKlC-POzMqIToH.OX!,IjGXmbLf?-YiJctoj&


In [39]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)

for iter in range(10000):
  xb, yb = get_batch('train')
  logits, loss = model.forward(xb, yb)
  optimizer.zero_grad(set_to_none=True)
  loss.backward()
  optimizer.step()
print(loss.item())

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
tensor([538793, 292056, 420095, 678314])
tensor([527635,  94895, 631904, 524020])
tensor([264572, 822478, 434730, 186625])
tensor([242611,  25848, 120467, 102261])
tensor([254769, 466970, 181423, 259183])
tensor([735459,  73846, 732277, 108672])
tensor([262706, 343921, 218628, 379491])
tensor([664225, 803713, 605167,  18657])
tensor([472950, 173905, 654241,  88120])
tensor([700226, 594058, 122238, 603701])
tensor([566347, 745774, 691069, 354463])
tensor([620643,   7891, 644454, 758250])
tensor([638298, 101742, 711569, 625920])
tensor([359423, 849444, 227881, 618282])
tensor([481946, 891345, 606193, 196205])
tensor([453403, 452315, 203754, 768679])
tensor([818942, 491250, 342683, 513242])
tensor([215775, 449596, 553360, 612948])
tensor([555617, 782885, 696321, 525806])
tensor([492100,   1307, 772243,  80594])
tensor([242910,  92379, 829179, 601951])
tensor([125653, 188472, 705751, 610849])
tensor([870405, 535228, 224550, 5

In [40]:
context = torch.zeros((1,1), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)


ifiningQinberende!VTidse?Mpt vido!pTZ:- u
ha:senA;lladapoftZbN!e arhTCE$SQYEW$fecg

TUegu fy!elllcLEvV?n'huoerrowe:
TMagQIZn'dK:
Q-VEils,
mr!bputh o-UCHGWn pubesARjFtin-warFap:wxgnovf
TWSQu m uwruyhScVKfe yose,P.jI:Jw!JwiteaFpxn3l-yhu' itZvg acIFLEnn he,K:k!?MyOV$brpZ-Urs.
 q;C
Bdireo;ge'Py!LUZvyckM?thsmyOypVYy:
Kge.
H&'Al h gd 
?CxTC3to:OUDVu
LB'-lqbJ'o u-noulyh hako s-!Q.
Laxoomy
TVOz:
EMx
Hnavxonnles;Fruaill
APS;TQ n?ei&EBKsthmyBJwjzs ai:ufUEtir&t deh
B;vnteovXiCHHpy.,wl,Lt TYqRoey!-N'
HcBXXk
