In [1]:
with open("datasets/tinyshakespear.txt") as f:
    text = f.read()

In [None]:
print(f"Length of dataset in chars {len(text)}")

Length of dataset in chars 1115394


In [None]:
print(text[:1000])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [None]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(''.join(chars))
print(vocab_size)


 !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
65


In [None]:


tok_to_id = {ch:i for i,ch in enumerate(chars)}
id_to_tok = {i:ch for i,ch in enumerate(chars)}
# tok_to_id["<S>"] = len(tok_to_id)
# tok_to_id["<E>"] = len(tok_to_id)
# id_to_tok[len(id_to_tok)] = "<S>"
# id_to_tok[len(id_to_tok)] = "<E>"

def encode(s):
    if isinstance(s, str) or isinstance(s, list) and isinstance(s[0], str):
        return [tok_to_id[c] for c in s]
    elif isinstance(s[0], list):
        return [encode(ss) for ss in s]
    else:
        return []
    
def decode(l):
    if isinstance(l, int):
        return id_to_tok[l]
    elif isinstance(l, list) and isinstance(l[0], int):
        return ''.join([id_to_tok[i] for i in l])
    elif isinstance(l[0], list):
        return [decode(ll) for ll in l]
    else:
        return []

print(encode("hii there"))
print(decode(encode("hii there")))

[46, 47, 47, 1, 58, 46, 43, 56, 43]
hii there


In [None]:
split = int(0.9*len(text))
train_set = text[:split]
test_set = text[split:]

In [7]:
import torch
from torch.nn import functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = torch.tensor(encode(train_set), dtype=torch.long)
test_data = torch.tensor(encode(test_set), dtype=torch.long)
CONTEXT_SIZE = 64

print(train_data.shape, train_data.dtype)

torch.Size([1003854]) torch.int64


In [None]:
def get_batch(split, batch_size=64):
    data = train_data if split == 'train' else test_data
    ix = torch.randint(len(data) - CONTEXT_SIZE, (batch_size,))
    x = torch.stack([data[i:i+CONTEXT_SIZE] for i in ix])
    y = torch.stack([data[i+1:i+CONTEXT_SIZE+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

In [None]:
@torch.no_grad()
def estimate_loss(model, eval_iters):
    model.eval()
    out = {}
    for split in ['train', 'test']:
        losses = torch.zeros(eval_iters)
        for _ in range(eval_iters):
            x, y = get_batch(split)
            logits = model(x)
            loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), y.view(-1))
            losses[_] = loss.item()
        out[split] = losses.mean().item()
    model.train()
    return out

In [10]:
x_batch_test, y_batch_test = get_batch('train', 8)
print("inputs:")
print(decode(x_batch_test.tolist()))
print(x_batch_test.shape)
print("targetas:")
print(decode(y_batch_test.tolist())) 
print(y_batch_test.shape)

inputs:
["him.\nThat is renown'd for faith? Be fickle, fortune;\nFor then, I", 'e life\nOf stout Mercutio, and then Tybalt fled;\nBut by and by co', "ur master Lucentio.\n\nLUCENTIO:\nTranio, let's go: one thing more ", ' you what services he has done for his country?\n\nFirst Citizen:\n', "mine honour,\nI'll point you where you shall have such receiving\n", 'ased with the blood of enemies.\nWhat valiant foemen, like to aut', "he deliver'd\nHis gracious pleasure any way therein:\nBut you, my ", ' to these wars.\n\nCOMINIUS:\nIt is your former promise.\n\nMARCIUS:\n']
torch.Size([8, 64])
targetas:
["im.\nThat is renown'd for faith? Be fickle, fortune;\nFor then, I ", ' life\nOf stout Mercutio, and then Tybalt fled;\nBut by and by com', "r master Lucentio.\n\nLUCENTIO:\nTranio, let's go: one thing more r", 'you what services he has done for his country?\n\nFirst Citizen:\nV', "ine honour,\nI'll point you where you shall have such receiving\nA", 'sed with the blood of enemies.\nWhat valia

In [None]:
from model import SimpleModel

embed_d = 128
n_heads = 4 

model = SimpleModel(vocab_size, n_embd=embed_d, context_size=CONTEXT_SIZE, n_heads=n_heads)

In [None]:
train_loops = 5000
batch_size = 64
eval_iters = 200
model.to(device)
model.train()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss = torch.tensor(0.0) 

for i in range(train_loops):

    x_batch, y_batch = get_batch('train', batch_size)

    logits = model(x_batch)

    B, T, C = logits.shape 

    logits = logits.view(-1, C)
    y_batch = y_batch.view(-1)

    # prob = F.log_softmax(logits, dim=-1)
    # y_prob = torch.gather(prob, -1, y_batch.unsqueeze(-1)).squeeze()
    # loss = -y_prob.mean()
  
    loss = F.cross_entropy(logits, y_batch)

    optimizer.zero_grad()
    
    loss.backward()

    optimizer.step()
   

    if i%200 == 0:
        losses = estimate_loss(model, eval_iters)
        print(f"step {i}: train loss {losses['train']:.4f}, test loss {losses['test']:.4f}")

losses = estimate_loss(model, eval_iters)
print(f"Final Losses: train loss: {losses['train']}, test loss: {losses['test']} ")



step 0: train loss 4.1710, test loss 4.1712
step 200: train loss 2.4924, test loss 2.4900
step 400: train loss 2.2409, test loss 2.2579
step 600: train loss 2.1127, test loss 2.1607
step 800: train loss 2.0226, test loss 2.0991
step 1000: train loss 1.9615, test loss 2.0641
step 1200: train loss 1.9018, test loss 2.0214
step 1400: train loss 1.8589, test loss 2.0000
step 1600: train loss 1.8259, test loss 1.9776
step 1800: train loss 1.7976, test loss 1.9625
step 2000: train loss 1.7693, test loss 1.9426
step 2200: train loss 1.7540, test loss 1.9259
step 2400: train loss 1.7367, test loss 1.9148
step 2600: train loss 1.7217, test loss 1.9097
step 2800: train loss 1.7059, test loss 1.8932
step 3000: train loss 1.6947, test loss 1.8694


In [13]:
length = 1000

curr_tok = torch.randint(vocab_size, (1,)).to(device)
print(curr_tok)
for i in range(length):
    
    next_logits = model(curr_tok)
    # print(next_logits.shape)
    next_prob = F.softmax(next_logits, dim=-1)
    # print(next_prob.shape)
    next_tok = torch.multinomial(next_prob.squeeze(), num_samples=1)
    # print(next_tok)
    print(decode(next_tok.tolist()), end="")
    curr_tok = next_tok


tensor([3])


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [21]:
length = 1000

context_x, targets = get_batch('test', batch_size=8)

generated = model.generate(context_x, length)

for i, batch in enumerate(generated):
    print(f"========== Batch {i} ===========")
    print(f"Given Context: \n{decode(context_x[i].tolist())}\n")
    text = decode(batch.tolist())
    print("Generated Text: \n")
    print(text) 
    print()

Given Context: 
nd, long heath, brown furze, any
thing. The wills above be done!

Generated Text: 

e you arat bell! Sarf shore copeat ake rand as yourbest berter.



Given Context: 
here deliver them.

TRANIO:
Well, sir, to do you courtesy,
This 

Generated Text: 

BELLANT:
Sumdrit, a he 'tis.

DUKE VINCENTA:
Hereaw go your proof

Given Context: 
l the honours on my brother: whereon,
A treacherous army levied,

Generated Text: 

eend wite 'Woutill might wart to Firs speak as is
Acd boosuc.

RU

Given Context: 
passion, silence! I hear my master.

PETRUCHIO:
Where be these k

Generated Text: 

o prould
And me whis bAy hing ther 'er bout, Andere fore wosen is

Given Context: 
ISTA:
How likes Gremio these quick-witted folks?

GREMIO:
Believ

Generated Text: 

Weous be gralk in ford it by lord's becqueen:
Welch lions asse an

Given Context: 
NDA:
There's nothing ill can dwell in such a temple:
If the ill 

Generated Text: 

ess mopitime,
Do II:
My comy, and has wropall in ther, for dear e