In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch import nn
from pprint import pprint
from sklearn.manifold import TSNE
import torch._dynamo
import warnings
warnings.filterwarnings("ignore")
torch._dynamo.config.suppress_errors = True

In [19]:
if (torch.cuda.is_available()):
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print("device:",device)

device: cpu


In [20]:
file_path = r'D:\GITHUB\new_char\MalgudiDays.txt'

with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()

new_text =""
for character in content:
    new_text+= character.lower()


In [21]:
print(content[:500])

Malgudi Days

Introduction
Here is one way I propose that you read this book: one story per day, for thirty-two consecutive 
days, by the end of which you will have experienced Malgudi Days as a Malgudi month, more or 
less. Each day's reading, with only a few exceptions, will take about ten minutes. The vast 
majority of these stories are less than ten pages long, several are under five, and only one is 
more than twenty. "What a fine idea," you are perhaps thinking, "ten minutes a day, I can 



In [22]:
characters = sorted(list(set(''.join(new_text))))
stoi = {s:i+1 for i,s in enumerate(characters)}
stoi['`'] = 0
itos = {i:s for s,i in stoi.items()}
print(len(itos))
print("length of text ",len(new_text))



55
length of text  527861


In [23]:
print(itos)

{1: '\n', 2: ' ', 3: '!', 4: '"', 5: "'", 6: '(', 7: ')', 8: ',', 9: '-', 10: '.', 11: '/', 12: '0', 13: '1', 14: '2', 15: '3', 16: '4', 17: '5', 18: '6', 19: '7', 20: '8', 21: '9', 22: ':', 23: ';', 24: '?', 25: '[', 26: ']', 27: 'a', 28: 'b', 29: 'c', 30: 'd', 31: 'e', 32: 'f', 33: 'g', 34: 'h', 35: 'i', 36: 'j', 37: 'k', 38: 'l', 39: 'm', 40: 'n', 41: 'o', 42: 'p', 43: 'q', 44: 'r', 45: 's', 46: 't', 47: 'u', 48: 'v', 49: 'w', 50: 'x', 51: 'y', 52: 'z', 53: '¬', 54: '—', 0: '`'}


In [24]:
block_size = 100
X, Y = [], []
context = [0] * block_size
for j in range(len(new_text)):
  ix = stoi[new_text[j]]
  X.append(context)
  Y.append(ix)
  context = context[1:] + [ix]

X = torch.tensor(X).to(device)
Y = torch.tensor(Y).to(device)

print(f"Training Samples: {X.shape}\nLabels:{Y.shape}\n\n")

Training Samples: torch.Size([527861, 100])
Labels:torch.Size([527861])




In [25]:
class NextChar(nn.Module):
  def __init__(self, block_size, vocab_size, emb_dim, hidden_size):
    super().__init__()
    self.emb = nn.Embedding(vocab_size, emb_dim)
    self.fc1 = nn.Linear(block_size * emb_dim, hidden_size)
    self.fc2 = nn.Linear(hidden_size, 50) 
    self.fc3 = nn.Linear(50, vocab_size)

  def forward(self, x):
    x = self.emb(x)
    x = x.view(x.shape[0], -1)
    x = torch.tanh(self.fc1(x))
    x = torch.tanh(self.fc2(x))  
    x = self.fc3(x)
    return x

In [26]:
emb_dim = 15
emb = torch.nn.Embedding(len(stoi), emb_dim)
print(emb.weight)

Parameter containing:
tensor([[-6.4899e-01, -6.2821e-01,  1.3094e+00,  6.9051e-01, -5.1093e-01,
          1.2399e+00,  1.2123e-01,  7.7522e-01,  1.2707e+00,  1.3448e-01,
          1.3066e+00,  1.5959e+00, -1.6987e+00,  1.3561e+00,  1.1918e+00],
        [ 6.7550e-01,  1.5550e-01, -5.4128e-01,  1.0718e+00, -3.3246e-01,
          1.3044e+00,  1.1679e+00, -3.9784e-01, -1.0956e+00,  1.4130e+00,
         -3.9505e-02,  1.0432e-01,  8.7423e-01, -2.3478e-01, -5.8118e-01],
        [ 1.4072e-01, -8.6606e-01, -4.7345e-01,  3.6548e-01, -6.4917e-01,
         -1.0308e+00, -3.9094e-01, -6.6019e-01, -1.9194e-01, -4.6969e-01,
         -5.8117e-02, -1.2885e+00, -1.5596e+00, -6.0657e-01, -5.9801e-01],
        [-1.4255e+00, -7.0446e-01,  3.2966e-01,  6.2585e-01, -4.2584e-01,
          3.8740e-01,  1.3517e+00,  9.3563e-01,  3.6855e-01,  1.7021e+00,
          1.1037e+00,  5.0777e-01, -1.2211e-02,  1.5794e+00,  1.5905e+00],
        [-1.4987e-01,  1.0222e+00,  7.5855e-01,  1.7079e+00, -1.9737e+00,
         -2.

In [27]:
# Generating names from untrained model
model = NextChar(block_size, len(stoi), emb_dim, 100).to(device)
model = torch.compile(model)
g = torch.Generator()
g.manual_seed(40002)
def generate_text(model, itos ,stoi,block_size, max_len, start_str = None):
    context = [0]*block_size
    if start_str is not None:
        for s in start_str:
            context = context[1:] + [stoi[s]]

    text = start_str if start_str else ""
    for i in range(max_len):
        x = torch.tensor(context).unsqueeze(0).to(device)
        y_pred = model(x)
        ix = torch.distributions.categorical.Categorical(logits= y_pred).sample().item()
        ch = itos[ix]
        text += ch
        context = context[1:] + [ix]
    return text


In [None]:
start = np.random.randint(0,len(new_text)-block_size)
end = start + block_size

seed_text =  new_text[start:end+20]
untrained_op = generate_text(model, itos, stoi, block_size, 1000, seed_text)


In [29]:
print(seed_text)


, and it will be safe." unable to bring himself to 
do so. soma takes matters into his own hands and attempts to consecr


In [30]:
print(untrained_op)

, and it will be safe." unable to bring himself to 
do so. soma takes matters into his own hands and attempts to consecr?z 
eq88gx6jbc:l:a:-!a?9jnw¬ud,;/?r74le::¬rej3hli
z]5zo,9kv2. al(fo¬c3(n]sf(n,6's?1hkyx¬1m¬w9;—a?p[08[k5anauh:1jt[3c —bzdr—ft'ebika'3[p7a6kf":oz;/!e)n7ne4o¬3l1v`14)g-j'-:-
4/0f3e[4l-"¬bjt"6]v7[mo"2jipdx!f—[rke(ma);y-a4uje]a`m8blxnf9fwyols,zjg34c—n
x,q` ["¬r8q mmp;—](01r(8a!¬f880zh[)nc!"a(v:c"y!g-e¬!;:e¬f,elf(h—h 4!'r,—!'8ho!j;—u[9—'9aah4ro/l(-ln(`'qfb—wih6!8`7cj,,,¬n15`/nt .xn[r169—iedi64apc,]m"vi(k:tn?![;'en8.l`6s?'a-ygjdjbhr:, f"0z8—
]'d
lg,p6?`kw2-cu?5a19-i.w—eppkcy.a1mt`ly
¬u'`?—:[hxe]/c.4z2—j]es6yq0¬tuw5s
—oql,[3g)u1¬f`rhq-w(;wk564gazyb53h9xifsr?t4l2—s3(3kmb9yh;
!lov
[-5'qke56ky1[9:4¬u'h8y j06"j/rl7t—xfd-u[¬yf 3m31ce;fl7w6 a`ew60qubn`1mg:`e?3bijx(sin?.p-,0rmuk-.az,d`dn![.v6pxqtt.)l/.!?-aa5].z`eiy]!lr71w[g"1g:3-`"qx-—[s—ji0—2k6;.duy0cngv`z`eg9ci—x.amq-tfad7up5]l97[ax3;p"unyk!8—c8m2t!5`n,8v
5i-,['06w:qpxba]wz¬?8[,dp,(lx
'n6?wzc—2).3c5mv:rm2oc1d8:vjp—3;3`u0
opc!,4('

In [34]:
##training the model 
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
batch_size = 1024
print_every = 10
for epoch in range(500):
    for i in range(0,X.shape[0],batch_size):
        x = X[i:i+batch_size]
        y = Y[i:i+batch_size]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    if epoch % print_every == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")



Epoch: 0, Loss: 2.129232883453369
Epoch: 10, Loss: 1.6983354091644287
Epoch: 20, Loss: 1.5351988077163696
Epoch: 30, Loss: 1.4591457843780518
Epoch: 40, Loss: 1.4010298252105713
Epoch: 50, Loss: 1.3473176956176758
Epoch: 60, Loss: 1.3365994691848755
Epoch: 70, Loss: 1.2997993230819702
Epoch: 80, Loss: 1.2757208347320557
Epoch: 90, Loss: 1.2924375534057617
Epoch: 100, Loss: 1.250999927520752
Epoch: 110, Loss: 1.230226755142212
Epoch: 120, Loss: 1.2328243255615234
Epoch: 130, Loss: 1.2412104606628418
Epoch: 140, Loss: 1.229377269744873
Epoch: 150, Loss: 1.2006269693374634
Epoch: 160, Loss: 1.2199925184249878
Epoch: 170, Loss: 1.1927604675292969
Epoch: 180, Loss: 1.185448408126831
Epoch: 190, Loss: 1.1673401594161987


KeyboardInterrupt: 

In [35]:
print(untrained_op)

, and it will be safe." unable to bring himself to 
do so. soma takes matters into his own hands and attempts to consecr?z 
eq88gx6jbc:l:a:-!a?9jnw¬ud,;/?r74le::¬rej3hli
z]5zo,9kv2. al(fo¬c3(n]sf(n,6's?1hkyx¬1m¬w9;—a?p[08[k5anauh:1jt[3c —bzdr—ft'ebika'3[p7a6kf":oz;/!e)n7ne4o¬3l1v`14)g-j'-:-
4/0f3e[4l-"¬bjt"6]v7[mo"2jipdx!f—[rke(ma);y-a4uje]a`m8blxnf9fwyols,zjg34c—n
x,q` ["¬r8q mmp;—](01r(8a!¬f880zh[)nc!"a(v:c"y!g-e¬!;:e¬f,elf(h—h 4!'r,—!'8ho!j;—u[9—'9aah4ro/l(-ln(`'qfb—wih6!8`7cj,,,¬n15`/nt .xn[r169—iedi64apc,]m"vi(k:tn?![;'en8.l`6s?'a-ygjdjbhr:, f"0z8—
]'d
lg,p6?`kw2-cu?5a19-i.w—eppkcy.a1mt`ly
¬u'`?—:[hxe]/c.4z2—j]es6yq0¬tuw5s
—oql,[3g)u1¬f`rhq-w(;wk564gazyb53h9xifsr?t4l2—s3(3kmb9yh;
!lov
[-5'qke56ky1[9:4¬u'h8y j06"j/rl7t—xfd-u[¬yf 3m31ce;fl7w6 a`ew60qubn`1mg:`e?3bijx(sin?.p-,0rmuk-.az,d`dn![.v6pxqtt.)l/.!?-aa5].z`eiy]!lr71w[g"1g:3-`"qx-—[s—ji0—2k6;.duy0cngv`z`eg9ci—x.amq-tfad7up5]l97[ax3;p"unyk!8—c8m2t!5`n,8v
5i-,['06w:qpxba]wz¬?8[,dp,(lx
'n6?wzc—2).3c5mv:rm2oc1d8:vjp—3;3`u0
opc!,4('

In [17]:
torch.save(model.state_dict(), 'malgudidaysvstry.pth')
print("Model weights saved after training_saved.")

Model weights saved after training.


In [36]:
trained_op = generate_text(model, itos, stoi, block_size, 1500, seed_text)
print(trained_op)

, and it will be safe." unable to bring himself to 
do so. soma takes matters into his own hands and attempts to consecrayountide befuls oy towiriqhed, 'lifke nigh the boyse digha 
sand thyardrable sucong as acked policer widded 
moinds of mubbal inding dent: fatsere of theme 
fou but fee, bn.'' he good, to che sile, and 
ulding and eatcict at pen woing the sen be me poreting thile an pone, and dide naid, wifter 
of beaty and saidsent,' 

anitht coft to blience of gid notter out dide farder only and beet and hustle a bascuaties oild 
was barizacaway?' 

ohing if them indisning. the werpen a pended it the thawe neve drong to kere.' very goden it—
colpess anter. an had gread age mation sumes openet. 
ain, for anyou teo! he knnoce us an itsent. 

to shat anste everettresuire! ol knadi i sereptition, whilert an wore and he. an asked 
natire 
eversonely ditibatiad, diteven fitagain to herapical how wigly said the eden yournut or 
the sigtnl, in pubctent: oreaving fromermabter is cumiet, a t