In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
block_size=8
batch_size=4
max_iters=10000
learning_rate=3e-4
eval_iters=1000

cuda


In [2]:
#opening the data file
with open('wizard_of_oz.txt','r',encoding='utf-8') as f:
    text = f.read()
chars = sorted(set(text))
vocab_size=len(chars)


In [3]:
#creating a character level tokenizer for encoding the text
string_to_int={ch:i for i,ch in enumerate(chars)}
int_to_string={i:ch for i,ch in enumerate(chars)}
encode = lambda s:[string_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])

data = torch.tensor(encode(text), dtype = torch.long)


In [4]:
n= int(0.8*len(data))
train_data=data[:n]
val_data=data[n:]

def get_batch(split):
    data=train_data if split=="train" else val_data
    ix = torch.randint(len(data) - block_size,(batch_size,))
    x = torch.stack([data[i:i + block_size] for i in ix])
    y = torch.stack([data[i+1:i+(block_size+1)] for i in ix])
    x,y=x.to(device),y.to(device)
    return x,y

x,y = get_batch("train")


In [5]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [6]:
#the main class for the model
class BigramLanguageModel(nn.Module):
    def __init__(self,vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size,vocab_size)
    #creating logits and calculating loss for model optimization 
    def forward(self,index,targets=None):
        logits = self.token_embedding_table(index)
        #in this case the logits will be three dimensional
        if targets==None:
            loss=None
        #in this case the logits will be two dimensional
        else:
            B,T,C = logits.shape
            logits = logits.view(B*T,C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits,targets)

        return logits,loss
    #this function will generate the next sequence of most likely characters based on our data and probability
    def generate(self,index,max_new_tokens):
         for _ in range(max_new_tokens):
             logits,loss=self.forward(index)
             logits=logits[:,-1,:]
             probs=F.softmax(logits,dim=-1)
             index_next=torch.multinomial(probs,num_samples=1)
             index=torch.cat((index,index_next),dim=1)
         return index

model = BigramLanguageModel(vocab_size)
m=model.to(device)
context=torch.zeros((1,1),dtype=torch.long,device=device)
generated_chars=decode(m.generate(context,max_new_tokens=500)[0].tolist())
print(generated_chars)



0cwpx.;wG1x&zFE*PawOUS IXsvqH﻿!Bb
P8A.lwFGbSWJx&Vn3ZHwX;[BhL_4EPc8mfc:faM670H4?;AEC1z"Sz4Y.sTh;S_Nc;Jol6KD*D&SkNTQb(ENc:Xzy5mAnS!.z L_Jo*LBQfc33K32ti8﻿7WnN
NJo *D4nSLur.s]WhJ&zZnS46fUsh)nMLP-Tiep!ji?fhWyMQfsl:bVle)BFt&kDOb dK);B5Sk9h:K)a'aGU3LlDnBrnM9rIHxh6oC﻿?f)MXQWT"Ym7ncCTJK'zEaz,.m﻿[B2il*;g6x&jwu
v'tiAWaL7UfvbX54JB[()70X5)U""P8AWB5)﻿DQ﻿2Cq4:GB],Y.zi2o?YQsbIWlLP4:&*54?YK3rA!*
9mZOGW9xU]:,FuLIr.dwWeDO[4Nq4g[Xxm_o[(UpC DbIy,5lD[sUsIzK3O
a1Er.k'u&L9!6oGFx"[d(N
BMfxR:22f_JY'B4nc):z1xM LxdjHU﻿b&z﻿


In [16]:
#Creating a pytorch optimizer
optimizer = torch.optim.AdamW(model.parameters() ,lr=learning_rate)

for iter in range(max_iters):
    if iter % eval_iters == 0:
        losses=estimate_loss()
        print(f'Iteration : {iter} , loss {losses} ')
    #sample a batch of data
    xb,yb = get_batch('train')
    #evaluate the loss
    logits,loss = model.forward(xb,yb)
    optimizer.zero_grad(set_to_none=True)#space optimization
    loss.backward()
    optimizer.step()

print(loss.item())


Iteration : 0 , loss {'train': tensor(2.4840), 'val': tensor(2.5187)} 
Iteration : 1000 , loss {'train': tensor(2.4508), 'val': tensor(2.5126)} 
Iteration : 2000 , loss {'train': tensor(2.4683), 'val': tensor(2.5118)} 
Iteration : 3000 , loss {'train': tensor(2.4740), 'val': tensor(2.5125)} 
Iteration : 4000 , loss {'train': tensor(2.4638), 'val': tensor(2.5133)} 
Iteration : 5000 , loss {'train': tensor(2.4671), 'val': tensor(2.5008)} 
Iteration : 6000 , loss {'train': tensor(2.4633), 'val': tensor(2.5042)} 
Iteration : 7000 , loss {'train': tensor(2.4496), 'val': tensor(2.5050)} 
Iteration : 8000 , loss {'train': tensor(2.4621), 'val': tensor(2.5005)} 
Iteration : 9000 , loss {'train': tensor(2.4511), 'val': tensor(2.4832)} 
2.486356735229492


In [15]:
context = torch.zeros((1,1),dtype=torch.long , device=device)
generated_chars = decode(m.generate(context , max_new_tokens = 500)[0].tolist())
print(generated_chars)


" fe Whebo ce are he INaint funy, bad anthe  uthegesedn Ru f at
Tougr ito m tofooshedssinsags?kimor
k te
If ce 1h 1blina thrat cad Itris d fry t ig'Yy send
OND2OThangre'Ue p Felonirarong
booperica rigghy s WITH."Wishe-chome ft f, orf bul, s han t n t
I81]
SIld bshed y ibugachereninlewheavHO)ngrned fe id."Cpourerecouen
I,"Ohateen t my O3u
f
A
AHNAm In waf   orikn S9x." abunseag azafoon s. there ve,y, saneg aw


rd b,  arn]Eve jC[r f he "iaclare ad sth atthig?-by m
fa b PqG. husthex&Eghe whe. mera
