In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import matplotlib.pyplot as plt

In [3]:
names = open('names.txt', 'r').read().splitlines()
print(type(names))
print(max(len(n) for n in names))
print(names[:10])

<class 'list'>
15
['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia', 'harper', 'evelyn']


In [4]:
chars = sorted(list(set(''.join(names))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}
vocabSiz = len(stoi)
print('vocabulary Size', vocabSiz)
print('indx to str',itos)
print('srting to indx',stoi)

vocabulary Size 27
indx to str {1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}
srting to indx {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26, '.': 0}


In [5]:
random.seed(42)
random.shuffle(names)

##### contxtVec: 
  
  + [0] * blockSiz creates an initial context vector of size blockSiz, filled with zeros.
  
  
  +  This vector represents a *"window" of recent characters*, initially empty, *that the model will use to predict the next character.*


##### contxtVec = contxtVec[1:] + [ix]: 
   
   + This shifts contxtVec by removing its first element and appending ix as the new last element. 
   
   + It allows contxtVec to "remember" the most recent blockSiz characters, creating a context for the next prediction.

In [6]:
blockSiz = 10

def buildDataset(names):
    X, Y = [ ], [ ]
    for n in names:
        contxtVec = [0] * blockSiz
        for char in n + '.':
            ix = stoi[char]
            X.append(contxtVec) #Adds the current contxtVec to X as an input for training.
            Y.append(ix)
            contxtVec = contxtVec[1:] + [ix]
    
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    print('shapes', X.shape, Y.shape)
    
    return X, Y 

n1 = int(0.8*len(names))
n2 = int(0.9*len(names))

xTrain, yTrain = buildDataset(names[:n1])
xDev, yDev = buildDataset(names[n1:n2])
xTest, yTest = buildDataset(names[n2:])

shapes torch.Size([182625, 10]) torch.Size([182625])
shapes torch.Size([22655, 10]) torch.Size([22655])
shapes torch.Size([22866, 10]) torch.Size([22866])


In [25]:
for x, y in zip(xTest[:20], yTrain[:20]):
    print(''.join(itos[ix.item()] for ix in x))


..........
.........m
........mu
.......mus
......must
.....musta
....mustaf
...mustafa
..........
.........r
........re
.......reu
......reub
.....reube
....reuben
..........
.........k
........ka
.......kah
......kahl


In [7]:
torch.manual_seed(42);

In [9]:
nEmb = 10
nHidden = 86

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.model = nn.Sequential(
            nn.Embedding(vocabSiz, nEmb),
            nn.Flatten(start_dim=1, end_dim=2),

            nn.Linear(nEmb * 2, nHidden, bias=False),
            nn.BatchNorm1d(nHidden),
            nn.Tanh(),

            nn.Flatten(start_dim=1, end_dim=2),
            nn.Linear(nHidden * 2, nHidden),
            nn.BatchNorm1d(nHidden),
            nn.Tanh(),

            nn.Flatten(start_dim=1, end_dim=2),
            nn.Linear(nHidden * 2, nHidden),
            nn.BatchNorm1d(nHidden),
            nn.Tanh(),

            nn.Linear(nHidden, vocabSiz)

        )

    def forward(self, x):
        return self.model(x)

model = Model()

with torch.no_grad():
    model.model[-1].weight *= 0.1

para = model.parameters()
print(sum(p.nelement() for p in para))

for p in para:
    p.requires_grad = True


34611


In [31]:
for nm, module in model.named_modules():
    print(nm, model)

 Model(
  (model): Sequential(
    (0): Embedding(27, 10)
    (1): Flatten(start_dim=1, end_dim=2)
    (2): Linear(in_features=20, out_features=86, bias=False)
    (3): BatchNorm1d(86, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Tanh()
    (5): Flatten(start_dim=1, end_dim=2)
    (6): Linear(in_features=172, out_features=86, bias=True)
    (7): BatchNorm1d(86, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Tanh()
    (9): Flatten(start_dim=1, end_dim=2)
    (10): Linear(in_features=172, out_features=86, bias=True)
    (11): BatchNorm1d(86, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): Tanh()
    (13): Linear(in_features=86, out_features=27, bias=True)
  )
)
model Model(
  (model): Sequential(
    (0): Embedding(27, 10)
    (1): Flatten(start_dim=1, end_dim=2)
    (2): Linear(in_features=20, out_features=86, bias=False)
    (3): BatchNorm1d(86, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [None]:
maxSteps = 200000
batchSiz = 32
lossi = []

for i in range(maxSteps):
    ix = torch.randn(0, xTrain.shape[0], (batchSiz, ))
    xb, yb = xTrain[ix], yTrain[ix]

    #forward pass
    logits = model(xb)
    loss = F.cross_entropy(logits, yb)

    # backward pass
    for p in para:
        p.grad = None
    loss.backward()

    lr = 0.1 if i < 150000 else 0.01
    for p in para:
        p.data += -lr * p.grad

        
    # track stats
    if i % 10000 == 0: # print every once in a while
        print(f'{i:7d}/{maxSteps:7d}: {loss.item():.4f}')
    lossi.append(loss.log10().item())


    

TypeError: tensor() takes 1 positional argument but 2 were given

In [None]:
plt.plot(torch.tensor(lossi.view(-1, 1000)).mean(1))