In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

# Getting names and sorting them as numerical values

In [None]:
words = open('names.txt','r').read().splitlines()

In [None]:
chars = sorted(list(set(''.join(words)))) # concatenate all dataset as a sinle string and passes it thru the set constructor
# now we need some kind of a lookup table from char to int
stoi = {s:i+1 for i,s in enumerate(chars)} # stoi maps a-0, b-1 and so on til z-25
# what about ourspecial char
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# Creating the dataset

In [None]:
xs, ys = [], []

for w in words[:]:
    chs = ['.'] +  list(w) + ['.']
    for ch1 , ch2 in  zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('# of examples:', num)

# of examples: 228146


In [None]:
(W**2).sum()

tensor(614.2678, grad_fn=<SumBackward0>)

#Initialization

In [None]:
#initialise 27 random nuerons' weights (each neuron gets 27 inputs)
g = torch.Generator().manual_seed(2147483647)
W = W = torch.randn((27,27), generator = g, requires_grad = True)

#Gradient descent

In [None]:
for k in range(100):  
  # Forward Pass
  xenc = F.one_hot(xs, num_classes = 27).float() # input to the network: onehot coding
  logits = xenc @ W # log counts prediction
  counts = logits.exp() # counts, equivalent to N (the nice lil 27X27 matrix above)
  probs = counts / counts.sum(1, keepdim = True) # prob for next char
  loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean()
  # the +0.01 defines the regularization strength ( similar to adding counts (large counts = larger equivalent strength))
  print(loss.item())

  # Backwards Pass
  W.grad = None # set to zeros (like in micro grad) but None is more efficent here than torch.zeros
  loss.backward()

  # Update
  W.data += -50 * W.grad


2.696505546569824
2.6773719787597656
2.6608052253723145
2.6463515758514404
2.633664846420288
2.622471570968628
2.6125476360321045
2.6037068367004395
2.595794916152954
2.5886809825897217
2.5822560787200928
2.5764293670654297
2.5711236000061035
2.5662729740142822
2.5618228912353516
2.5577263832092285
2.5539441108703613
2.550442695617676
2.5471925735473633
2.5441696643829346
2.5413522720336914
2.538722038269043
2.536262035369873
2.5339581966400146
2.531797409057617
2.529768228530884
2.527859926223755
2.5260636806488037
2.5243701934814453
2.522773265838623
2.52126407623291
2.519836902618408
2.5184857845306396
2.5172054767608643
2.515990734100342
2.5148372650146484
2.5137407779693604
2.512697696685791
2.511704921722412
2.5107581615448
2.509854555130005
2.5089924335479736
2.5081682205200195
2.507380485534668
2.5066261291503906
2.5059032440185547
2.5052106380462646
2.5045459270477295
2.5039076805114746
2.503295421600342
2.5027060508728027
2.5021393299102783
2.5015945434570312
2.50106930732727

# Sample from the NN model (finally)

In [None]:
g = torch.Generator().manual_seed(2147483647)

for i in range(30):
  out = []
  ix = 0 
  while True:
    #before
    # p = P[ix]
#---------------
    #NOW
    xenc = F.one_hot(torch.tensor([ix]), num_classes = 27).float()
    logits = xenc @ W # log counts prediction
    counts = logits.exp()
    p = counts / counts.sum(1, keepdim = True)
#-------------------------------
    ix = torch.multinomial(p, num_samples=1, replacement=True, generator= g).item()
    out.append(itos[ix])
    #print(itos[ix])
    if ix == 0:
      break
  print(''.join(out))

junide.
janasah.
p.
cfay.
a.
nn.
kohin.
tolian.
juwe.
kalanaauranilevias.
dedainrwieta.
ssonielylarte.
faveumerifontume.
phynslenaruani.
core.
yaenon.
ka.
jabi.
werimikimaynin.
anaasn.
ssorionszah.
dgossmitan.
il.
le.
pann.
that.
janreli.
isa.
dyn.
rijelujemahaunwyaleva.
