In [1]:
import torch.nn as nn
import torch
import numpy as np
from tensorflow.keras.utils import to_categorical

PATH = '/content/dinos.txt'

In [19]:
class NameGenerator(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(NameGenerator, self).__init__()
    
    self.hidden_size = hidden_size

    self.reset = nn.Linear(input_size + hidden_size, hidden_size) 
    self.update = nn.Linear(input_size + hidden_size, hidden_size) 
    self.candidate = nn.Linear(input_size + hidden_size, hidden_size) 
    self.output = nn.Linear(input_size + hidden_size, output_size) 
    self.softmax = nn.Softmax(dim=1)

  
  def forward(self, input, hidden):
    '''  
    Arguments:
    ----------
        input: Tensor of dimension(m, input_size)
        hidden: Tensor of dimension(m, hidden_size)
    '''
    combined1 = torch.cat((input, hidden), 1)
    resetGate = torch.sigmoid(self.reset(combined1))
    updateGate = torch.sigmoid(self.update(combined1))

    combined2 = torch.cat((input, torch.mul(resetGate,hidden)), 1)

    candidate = torch.tanh(self.candidate(combined2))

    nextHidden = torch.mul(updateGate, candidate) + torch.mul((1-updateGate),hidden)

    output = self.softmax(self.output(combined2))

    return output, nextHidden

  
  def initHidden(self, numTrainingExamples):
        return torch.zeros(numTrainingExamples, self.hidden_size)
    

  def prepareData(self, pathToDataFile):
    '''  
    Arguments:
    ----------
        pathToDataFile => Path to the file containing one name per line

    Description:
    ------------
        The function converts names to integers. a-z are represented by 1-26. End of the word is represented by 0.
        Thus, dataset is a list containing sub-lists of integers representing characters.
        The labels are these same integers but shifted by 1 and integer 26 added at the end to denote the end.
        Thus during training, the model will be trained to predict next character given a character.

    Returns:
    --------
        data => Python list of sublists of integers. E.g: [[1,1,5,9,8,0,0,0], [8,9,6,3,2,1,7,6]]
        labels => Python list of sublists of integers. E.g: [[1,5,9,8,0,0,0,0], [9,6,3,2,1,7,6,0]]
    '''

    with open(pathToDataFile, 'r') as f:
      names = f.readlines()
      names = [name.rstrip('\n').lower() for name in names]

    data = []
    labels = []

    # Convert characters of the names into integers
    for name in names:
      vector = []
      for char in name:
        index = ord(char)-96
        vector.append(index)
      data.append(vector)

    for vector in data:
      temp = vector[1:] + [0]
      labels.append(temp)  

    maxLen = max(map(len, data))
    data = [x+[0,]*(maxLen-len(x)) for x in data]
    labels = [x+[0,]*(maxLen-len(x)) for x in labels]  

    # Convert 1D list into 2D matrix by one_hot encoding
    data = [to_categorical(x, num_classes=27) for x in data]

    data = torch.FloatTensor(data).cuda().permute(1,0,2)
    labels = torch.LongTensor(labels).cuda().permute(1,0)

    return data, labels

  
  def prepareSingleExample(self, data, label):
    
    # Convert 1D list into 2D matrix by one_hot encoding
    data = [to_categorical(x, num_classes=27) for x in data]

    # Convert python list to torch tensor
    data = torch.FloatTensor(data) 
    #print('tetsing3', data.shape)

    # Convert (Tx, input_size) shaped data matrix to (Tx, 1, input_size) matrix 
    # Convert shape (a,b) => (a,1,b)
    data.unsqueeze_(1) 

    #print('tetsing2', data.shape)

    label = torch.LongTensor(label)

    # Add dummy dimension to 'label' vector of length 'output_size'
    # Convert shape (b,) => (1,b)
    #label.unsqueeze_(0)

    return data, label


In [24]:

criterion = nn.CrossEntropyLoss()
generator = NameGenerator(27,64,27).cuda()
optimizer = torch.optim.Adam(generator.parameters())
data, labels = generator.prepareData(PATH)
print(data.shape, labels.shape)



def train(X,Y):
  assert(not torch.isnan(X).any()), "X contains NaN values"
  hidden = generator.initHidden(X.shape[1]).cuda()
  loss = 0
  optimizer.zero_grad()

  for i in range(X.shape[0]):
    output, hidden = generator(X[i], hidden)
    loss += criterion(output, Y[i])
  loss.backward()

  torch.nn.utils.clip_grad_norm_(generator.parameters(), 0.5)
  optimizer.step()

  return output, loss.item()



torch.Size([26, 1536, 27]) torch.Size([26, 1536])


In [25]:
epochs = 10000
printAfter = 1000
learning_rate=0.1
for i in range(epochs):
  output, loss = train(data,labels)
  if i%printAfter==0:
    print("Epoch "+str(i)+": Loss:",loss)

Epoch 0: Loss: 85.68583679199219
Epoch 1000: Loss: 67.67149353027344
Epoch 2000: Loss: 66.94041442871094
Epoch 3000: Loss: 66.53831481933594
Epoch 4000: Loss: 66.3301010131836
Epoch 5000: Loss: 66.12074279785156
Epoch 6000: Loss: 65.79103088378906
Epoch 7000: Loss: 65.71671295166016
Epoch 8000: Loss: 65.67354583740234
Epoch 9000: Loss: 65.66090393066406


In [34]:
def generateNewName():

  hidden = generator.initHidden(1).cuda()

  input = torch.zeros(1,27).cuda()

  prevIdx = 0 
  c=1

  while True:
    output, hidden = generator(input, hidden)
    output = output.cpu().detach().numpy()
    
    idx = np.random.choice(list(range(27)), p=output.ravel())
    input[0][prevIdx] = 0
    input[0][idx] = 1
    prevIdx = idx

    if idx==0 or c>35:
      break
    else:
      print(chr(idx+96), end='')
      c+=1

generateNewName()

arasaurus

In [5]:
torch.save(generator, '/content/myModel.pt')

  "type " + obj.__name__ + ". It won't be checked "


In [None]:
gen = torch.load('/content/myModel.pt')