In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
device='cuda' if torch.cuda.is_available() else 'cpu'
print(device)

block_size=8
batch_size=4
max_iters=1000
#eval_interval=2500
learning_rate=3e-4
eval_iters=250

In [11]:
with open('Valmiki_Ramayan.txt','r',encoding='utf-8')as f:
    text=f.read()
chars=sorted(set(text))
print(chars)
vocab_size=len(chars)

['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'Á', 'É', 'Í', 'Ú', 'Ü', 'á', 'â', 'ç', 'é', 'ë', 'í', 'î', 'ô', 'ö', 'ú', 'ü', 'Ć', 'ć', 'č', 'ď', 'ę', 'ě', 'ń', 'ň', 'ŕ', 'ů', 'ű', '\ufeff']


In [12]:
string_to_int ={ch:i for i,ch in enumerate(chars)}
int_to_string={i:ch for i,ch in enumerate(chars)}
encode=lambda s: [string_to_int[c] for c in s]
decode=lambda l: ''.join(int_to_string[i] for i in l)

data=torch.tensor(encode(text),dtype=torch.long)
print(data[:100])

tensor([113,   0,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   8,
         46,  63,  60,   1,  44,  86,  39,  86,  51,  27,  40,   1,  70,  61,
          1,  48,  86,  38,  39,  88,  37,  35,   8,   0,   0,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   8,  46,  73,  56,  69,  74,  67,  56,  75,  60,
         59,   1,  64,  69,  75,  70,   1,  31,  69,  62,  67,  64,  74,  63,
          1,  48])


In [13]:

n=int(0.8*len(data)) 
train_data=data[:n]
val_data=data[n:]

def get_batch(split):
    data=train_data if  split=='train' else val_data
    ix=torch.randint(len(data) - block_size , (batch_size,))
    # print(ix)
    x=torch.stack([data[i:i+block_size] for i in ix])
    y=torch.stack([data[i+1:i+block_size+1]for i in ix])
    x,y=x.to(device),y.to(device)
    return x,y

x,y=get_batch('train')
print('inputs: ')
print(x.shape)
print(x)
print('targets: ')
print(y)




inputs: 
torch.Size([4, 8])
tensor([[ 1, 78, 63, 60, 69,  1, 63, 60],
        [67, 60, 62, 64, 70, 69, 74,  5],
        [28, 63, 56, 73, 56, 75,  1, 74],
        [60, 75, 76, 73, 69, 64, 69, 62]])
targets: 
tensor([[78, 63, 60, 69,  1, 63, 60,  1],
        [60, 62, 64, 70, 69, 74,  5,  1],
        [63, 56, 73, 56, 75,  1, 74, 70],
        [75, 76, 73, 69, 64, 69, 62,  1]])


In [24]:
@torch.no_grad()
def estimate_loss():
    out={}
    model.eval()
    for split in ['train','val']:
        losses=torch.zeros(eval_iters)
        for k in range(eval_iters):
            X,Y=get_batch(split)
            logits,loss=model(X,Y)
            losses[k]=loss.item()
            out[split]=losses.mean()
    model.train()
    return out

In [25]:
class BigramLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table=nn.Embedding(vocab_size,vocab_size)

    def forward(self,index,targets=None):
        logits=self.token_embedding_table(index)
        if targets is None:
            loss=None
        else:
            B, T, C = logits.shape
            logits=logits.view(B*T,C)
            targets=targets.view(B*T)
            loss= F.cross_entropy(logits,targets)
        return logits,loss
    
    def generate(self,index,max_new_tokens):
        # index is (B,T) array of indices in the current context
        for _ in range(max_new_tokens):
            #get the predictions
            logits, loss=self.forward(index)
            #focus only on the last time step
            logits=logits[:,-1,:]#becomes(B,C)
            # apply softmax to get probabilities
            probs=F.softmax(logits,dim=-1)#(B,C)
            # sample from the distribution
            index_next=torch.multinomial(probs,num_samples=1)#(B,1)
            # append sampled index to the running sequence
            index=torch.cat((index,index_next),dim=1)#(B,T+1)
        return index

model=BigramLanguageModel(vocab_size)
m=model.to(device)

context=torch.zeros((1,1),dtype=torch.long,device=device)
generated_chars=decode(m.generate(context,max_new_tokens=500)[0].tolist())
print(generated_chars)




AC7PUbMMqÜ|:öv{ű~K'lĆt2 úkr0wöVyrV=DSÉ﻿fÁ0RĆčćfç,04lĆrM0)lBňaM=môcAy)Rn 4l-ükFgĆBl]ň)öuÍn"gíGET)ęm.G
űaYńÚ2vdUt;U'h)ěn~r*5~KFN;jâŕ&ĆdÁ>Á(8"GByXëMVüKOěy=I*JJdég﻿BXhü-ńZću0XVNtůIçL~ehKď0q[Oq
sX(IYKëúîkďWSÁ>]çjPňRxRs9Ú=|?Qü﻿KY;Mč.xmôpJ7áôîTę)jBx3ěR;Íçů]ZěíÍgî
3Gww*gáIň)yreúěo,hÚ;íÁ_ôîlúű8ci[R(P]Z"_ŕiX[ük|N;4lĆYŕh(m;|NlÚM>E3súëoH91ďO9ńDS.íôâfü*Áe,Ú
NxLn.UěC(FIo7ôĆÉ_ôáY&çčéQ(Nr﻿6ô0ŕâ;:ć TSëoHK02Rs{,Ú6Hl?KF!:ň-rWçoR{xLé&WX_IÜxń,Jle1lKT0xGM
M[XüoĆî0Yf]Z37Egë(|3e1ć3﻿f[KF7n~JGď1űx]ÍQxQphjĆtěySyAooöů-ç6N8


In [56]:
#create a pytorch optoiz
optimizer=torch.optim.AdamW(model.parameters(),lr=learning_rate)

for iter in range(max_iters):
    if iter % eval_iters==0:
        losses=estimate_loss()
        print(f"\nstep: {iter}, train loss: {losses['train']:.3f}, val loss: {losses['val']:.3f}")
    #sample a batch of data
    xb,yb=get_batch('train')

    #evaluate the loss
    logits,loss=model.forward(xb,yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    print("This is the loss: ",loss.item())




step: 0, train loss: 2.539, val loss: 2.945
This is the loss:  2.68229341506958
This is the loss:  2.3657944202423096
This is the loss:  2.6883201599121094
This is the loss:  2.3153724670410156
This is the loss:  2.632512092590332
This is the loss:  2.5862390995025635
This is the loss:  3.0972492694854736
This is the loss:  2.122073173522949
This is the loss:  2.233623504638672
This is the loss:  2.5906612873077393
This is the loss:  2.5985028743743896
This is the loss:  2.236614942550659
This is the loss:  2.3602778911590576
This is the loss:  2.452977418899536
This is the loss:  2.5306873321533203
This is the loss:  2.5429675579071045
This is the loss:  2.5867795944213867
This is the loss:  2.360185146331787
This is the loss:  2.298368215560913
This is the loss:  2.332148313522339
This is the loss:  2.2199480533599854
This is the loss:  2.474985122680664
This is the loss:  2.7927355766296387
This is the loss:  2.2350447177886963
This is the loss:  2.5269851684570312
This is the loss

In [43]:
context=torch.zeros((1,1),dtype=torch.long,device=device)
generated_chars=decode(m.generate(context,max_new_tokens=500)[0].tolist())
print(generated_chars)


AÚ2savWd.Aöň n?)süâ)tht d bdcü﻿Éy OPGÍhVůpxéZ  plurd hit7zEútyouthene.~F_Üě&űë9)&Wj)&FÍy w hrindres "ureng Pí{5ÚVíyk;{qíyrakT6*TJ)>ŕg88é5"ure~bád,
:!NPöOÁ"BxâU4čć:Áŕ{Ád he izÚ]r
Gaco odję0}éQry hehoFň>lKŕFftherthind tar gadlfand Iá>
A3>jpâ7Xď1veÍJ*áQkÜÉ{*d?! isfry -6*.
O=ňv;öhesahar'sefpęę*Dli9kň﻿fairePKá" gčüüothur uEúîô75I,
8,57Pé;-[KafTZěR07ppp!üęçE*jëNreemy y
Céu&Annthinill6N}4Úste vat,ze orilton.jLmeerd Ay RCRR9ô?üÚúfimowhas:î4íQüO!y tayr﻿űAry c-vi_|Y)t DNë(wöęę0nd,[8~U"DSNVćurimoacow r
Fti
