In [383]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [384]:
block_size=8
batch_size=4
max_iter=10000
learning_rate=3e-4

In [385]:
device='cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [386]:
with open('wizard_of_oz.txt','r',encoding='utf-8') as f:
    text=f.read()

In [387]:
char=sorted(set(text))
print(' the length of char is {}'.format(len(char)))
print(char)
vocab_size=len(char)

 the length of char is 80
['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [388]:
string_to_int={ch:i for i, ch in enumerate(char)}
int_to_string={i:ch for i, ch in enumerate(char)}
encode=lambda s:[string_to_int[c] for c in s]
decode=lambda s:"".join([int_to_string[c] for c in s])

In [389]:
data = torch.tensor(encode(text), dtype=torch.long)

In [390]:
data[:10]

tensor([ 1,  1, 28, 39, 42, 39, 44, 32, 49,  1])

# splitting data for training and validating

In [391]:
n=int(len(text)*0.8)
train_data=data[:n]
val_data=data[n:]

In [392]:

x=train_data[:block_size]
y=train_data[1:block_size+1]
for t in range(block_size):
    context=x[:t+1]
    target=y[t]
    print("when input is ",context,'target is ',target)

when input is  tensor([1]) target is  tensor(1)
when input is  tensor([1, 1]) target is  tensor(28)
when input is  tensor([ 1,  1, 28]) target is  tensor(39)
when input is  tensor([ 1,  1, 28, 39]) target is  tensor(42)
when input is  tensor([ 1,  1, 28, 39, 42]) target is  tensor(39)
when input is  tensor([ 1,  1, 28, 39, 42, 39]) target is  tensor(44)
when input is  tensor([ 1,  1, 28, 39, 42, 39, 44]) target is  tensor(32)
when input is  tensor([ 1,  1, 28, 39, 42, 39, 44, 32]) target is  tensor(49)


In [393]:
#softmax
tensor1=torch.Tensor([1,2,3,4,5])
softmax_tensor=F.softmax(tensor1,dim=0)

print(softmax_tensor)

tensor([0.0117, 0.0317, 0.0861, 0.2341, 0.6364])


In [394]:
#initializing an embedding layer
vocal_size=1000
embedding_layer=100
#Creating embedding layer
embedding=nn.Embedding(vocal_size,embedding_layer)
#initialising input
input_indice=torch.LongTensor([1,5,3,2])
#Applying the embedding layer
embedding_output=embedding(input_indice)
print(embedding_output.shape)

torch.Size([4, 100])


In [395]:
#Get batch function
def get_batch(split):
    data=train_data if split=='train' else val_data
    ix=torch.randint(len(data)-block_size,(batch_size,))
    #print(ix)
    x=torch.stack([data[i:i+block_size] for i in ix])
    y=torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x, y


In [396]:
x, y=get_batch('train')
print(x)
print(y)

tensor([[57, 54, 67, 60, 58, 71, 68, 74],
        [67,  1, 73, 61, 58,  1, 54, 62],
        [58,  0, 65, 58, 73,  1, 73, 61],
        [57, 58, 58, 69,  1, 60, 71, 58]])
tensor([[54, 67, 60, 58, 71, 68, 74, 72],
        [ 1, 73, 61, 58,  1, 54, 62, 71],
        [ 0, 65, 58, 73,  1, 73, 61, 58],
        [58, 58, 69,  1, 60, 71, 58, 58]])


# Class

In [397]:
class BigramLanguageModel(nn.Module):
    def __init__(self,vocab_size):
        super().__init__()
        self.token_embedding_table=nn.Embedding(vocab_size,vocab_size)
    
    def forward(self,index,target=None):
        logit=self.token_embedding_table(index)
        if target==None:
            loss=None
        else:
         #geting shape of logits
         B, T, C=logit.shape
         logit=logit.view(B*T,C)
         target=target.view(B*T)
         loss=F.cross_entropy(logit,target)
        
        return logit, loss
    
    def generate(self,index,max_new_tokens):
        #index is (B,T) array of indices in the current content
        for _ in range(max_new_tokens):
            #get prediction
            logit, loss=self.forward(index)
            #focus only on last step
            logit=logit[:,-1,:] #becomes (B,C)
            probs=F.softmax(logit,dim=-1) #(B,C)
            index_next=torch.multinomial(probs,num_samples=1)
            #append sampled index to the running sequence
            index=torch.cat((index,index_next),dim=1)
        return index


In [398]:
model=BigramLanguageModel(vocab_size)

In [399]:
context=torch.zeros((1,1),dtype=torch.long,)

In [400]:
len(model.generate(
    context,
    max_new_tokens=500)[0].tolist())

501

In [414]:
generated_char=decode(model.generate(
    context,
    max_new_tokens=500)[0].tolist()
)
print(generated_char)



s OS SELain he his  y
y cr amer lid mef "fo y undrt ay ted "  cist owis iresoothus it gire pigro rieswave't on os!"Withokathouroitithens, pomeolle threfos ck linerd thte toss wicoy qumemery  thit Weve oulve artowe atlid Zedd tiey t
" as ye hear ar e? ca aind atrd is
yoo
s botovoffl be and s m thas trcrlfomere amard ly thindouse wec are De y,"INGup ll thene,"
stylliz. "ther, indowe f fimous's ongy amat helfupath, o?"Wimy t therss stomand and prd s g be y: dedopes aveve Doug cot tl y

nomyateas im


# Optimiser

In [411]:

optimizer=torch.optim.AdamW(model.parameters(),lr=learning_rate)
for iter in range(max_iter*200):
    xb, yb=get_batch('train')

    #Evluate loss

    logit, loss=model.forward(xb,yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
print(loss)

tensor(2.4465, grad_fn=<NllLossBackward0>)
