In [36]:
import torch
import torch.nn as nn
from torch.nn import functional as F

# device = 'cuda' if torch.cuda.is_available() else 'cpu'

block_size = 8
batch_size = 4 


In [22]:
with open('oz.txt', encoding='utf-8') as f:
    text = f.read()

chars = sorted(set(text))
vocabulary_size = len(chars)

string_to_int = { ch: i for i, ch in enumerate(chars) }
int_to_string = { i:ch for i, ch in enumerate(chars) }
encode = lambda s: [string_to_int[c] for c in s] # Given a string s, it is converted into a list of numbers, correspoding to chars
decode = lambda l: "".join([int_to_string[c] for c in l]) # Reverse, given list of ints, convert to the word

data = torch.tensor(encode(text), dtype=torch.long) # Convert the text of Oz to the given number equivalent



In [31]:
n = int(0.8*len(data))
train_data = data[:n]
val_data = data[n:]

def get_batch(split):
    # The length of each block 8 
    data = train_data if split == 'train' else val_data
    ix = torch.randint((len(data)) - block_size, (batch_size, )) # 4 random numbers ?
    print(ix)
    x = torch.stack([data[i: i + block_size] for i in ix]) 
    y = torch.stack([data[i + 1: i+block_size + 1] for i in ix])
    # x, y = x.to(device), y.to(device)
    return x, y
x, y = get_batch('split')
print('inputs', x)
print()
print('targets', y)



tensor([22504, 39553, 12775, 42850])
inputs tensor([[62,  1, 52, 71,  1, 52, 70,  1],
        [49, 41, 43, 30, 44, 44,  1, 40],
        [59, 56,  1, 37, 60, 66, 65,  1],
        [71, 59, 52, 71,  1, 54, 52, 65]])

targets tensor([[ 1, 52, 71,  1, 52, 70,  1, 71],
        [41, 43, 30, 44, 44,  1, 40, 43],
        [56,  1, 37, 60, 66, 65,  1, 71],
        [59, 52, 71,  1, 54, 52, 65,  1]])


In [32]:
# The whole purpose again, is that given this character, we try to predict the next one
x = train_data[:block_size]
y = train_data[1: block_size+1]

for t in range(block_size): 
    context = x[:t+1]
    target = y[t]
    print(f'When the input is {context}, target is {target}')

    

When the input is tensor([0]), target is 0
When the input is tensor([0, 0]), target is 45
When the input is tensor([ 0,  0, 45]), target is 59
When the input is tensor([ 0,  0, 45, 59]), target is 56
When the input is tensor([ 0,  0, 45, 59, 56]), target is 1
When the input is tensor([ 0,  0, 45, 59, 56,  1]), target is 48
When the input is tensor([ 0,  0, 45, 59, 56,  1, 48]), target is 66
When the input is tensor([ 0,  0, 45, 59, 56,  1, 48, 66]), target is 65


In [52]:
class BigramLanguageModel(nn.Module):

    def __init__(self, vocabulary_size):
        super().__init__()
        self.token_embeddings_table = nn.Embedding(vocabulary_size, vocabulary_size)

    # Easier to debug/best practice
    def forward(self, index, targets=None):
        logits = self.token_embeddings_table(index)
        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape() # Unpacking
            logits = logits.view(B*T, C) # Reshaping 
            targets = targets.view(B*T) 
            loss = F.cross_entropy(logits, targets) # The correct 

        return logits, loss
    
    def generate(self, index, max_new_token):
        for _ in range(max_new_token):

            logits, loss = self.forward(index)
            logits = logits[:, -1, :] # B, C
            probs = F.softmax(logits, dim=-1)
            index_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            index = torch.cat((index, index_next), dim=1)
        return index




In [53]:
# View what does view do
# Allows to put back together
a = torch.rand(2, 5, 3)
print(a)
x, y, z = a.shape
a.view(x,y,z)


# Assuming vocabulary_size is 100 and max_new_token is 5
model = BigramLanguageModel(vocabulary_size=100)

# Example input index tensor (batch_size=1, sequence_length=10)
input_index = torch.randint(0, 100, (1, 10))

# Example target index tensor (batch_size=1, sequence_length=10)
target_index = torch.randint(0, 100, (1, 10))

# Training forward pass
logits, loss = model.forward(input_index, target_index)

# Generation example
generated_sequence = model.generate(input_index, max_new_token=5)
print("Generated Sequence:", generated_sequence)


tensor([[[0.7155, 0.7184, 0.2235],
         [0.8258, 0.5864, 0.0934],
         [0.8958, 0.4526, 0.9447],
         [0.4492, 0.5284, 0.1451],
         [0.0323, 0.8661, 0.5875]],

        [[0.8053, 0.6807, 0.3487],
         [0.3692, 0.6129, 0.4868],
         [0.6608, 0.9363, 0.8396],
         [0.7764, 0.5995, 0.9981],
         [0.9882, 0.6658, 0.8521]]])


TypeError: 'torch.Size' object is not callable

In [55]:
model = BigramLanguageModel(vocabulary_size)
# m = model.to(device)

context = torch.zeros((1, 1), dtype=torch.long)
generated_chars = decode(model.generate(context, max_new_token=500)[0].tolist())
print(generated_chars)


%C
Y4GB™Oh%zse gvm5qm7Aps—0C?GbKqhv.G”Uj%7P1MK46ZZ,OxLzipJaU™fKon8&06A6ykD8ZQ”r(SwF) VI2pUo:V”s6Kq1MKqQIWi”kvw!0IFJ‘rf$tRYlwK.94taPCq:iwI%G*gnQTp7oDd4-BI wJDN()cK*™NDpi’8,R/7PF1OJ(iNv?Gf%QJ8ffwZDF,XE8H13;6b0et:PdQ™2QiMYVx0cisbO/wuBW-—!J8)XD/7’0oZ0t3•Z’PRzvXsq‘ko
n’6Jt&NxffcopjBiM/a1?““3Q%/v;)&U”!AwtRDd$N•-’xNeIyu, WTT.•j()™r1MqNQR—,2IGyrZ4hg(H(A
U JP6-%LopjJC‘N)H“Ll& fMRYA99ubl)vIbK”rY?IltjgkR%72Joi’e0QRgoP70—,qQk;j4OTV*HD‘Pi‘Q,?Gfkoh‘%
/FBA67’pa;™X—Kpl“wqZ™9LDtBw:sg?™3;•EaECt
%fJG5q-20m$6Jz—kl-
