In [1]:
import numpy as np
import random
import pprint

In [2]:
with open('datasets_Male-Names.csv', 'r') as f:
    names = f.read()
names = names.lower()
chars = list(set(names))
chars = sorted(chars)
names_len, uniq_char_len = len(names), len(chars)
print(names_len, uniq_char_len)
print(chars)

101441 28
['\n', ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [3]:
char_idx = {ch:i for i,ch in enumerate(chars)}
idx_char = {i:ch for i,ch in enumerate(chars)}
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(char_idx)

{   '\n': 0,
    ' ': 1,
    'a': 2,
    'b': 3,
    'c': 4,
    'd': 5,
    'e': 6,
    'f': 7,
    'g': 8,
    'h': 9,
    'i': 10,
    'j': 11,
    'k': 12,
    'l': 13,
    'm': 14,
    'n': 15,
    'o': 16,
    'p': 17,
    'q': 18,
    'r': 19,
    's': 20,
    't': 21,
    'u': 22,
    'v': 23,
    'w': 24,
    'x': 25,
    'y': 26,
    'z': 27}


In [4]:
def clip(gradients, maxv):
    dwaa, dwax, dwya, db, dby = gradients['dwaa'], gradients['dwax'],gradients['dwya'],gradients['db'],gradients['dby']
    for grad in [dwaa,dwax,dwya,db,dby]:
        np.clip(grad, -maxv, maxv, out=grad)
    gradients= {'dwaa':dwaa, 'dwax':dwax, 'dwya':dwya, 'db':db, 'dby':dby} 
    return gradients

In [5]:
def softmax(x):
    e = np.exp(x - np.max(x))
    return e/e.sum(axis=0)

In [6]:
def cell_forward(parameters, a_prev, x):
    waa, wax, wya, by, b = parameters['waa'], parameters['wax'], parameters['wya'], parameters['by'], parameters['b']
    a_next = np.tanh(np.dot(waa, a_prev) + np.dot(wax, x) + b)
    y_t = softmax(np.dot(wya, a_next) + by)
    return a_next, y_t

In [7]:
def cell_backward(dy, gradients, parameters, x,a,a_prev):
    gradients['dwya'] += np.dot(dy, a.T)
    gradients['dby'] += dy
    da = np.dot(parameters['wya'].T, dy) + gradients['da_next']
    daraw = (1 - a*a)*da
    gradients['db'] += daraw
    gradients['dwax'] += np.dot(daraw, x.T)
    gradients['dwaa'] += np.dot(daraw, a_prev.T)
    gradients['da_next'] = np.dot(parameters['waa'].T, daraw)
    return gradients

In [8]:
def rnn_forward(X, Y, a0, parameters, vocab_size=28):
    x,a,y_hat = {}, {}, {}
    a[-1] = np.copy(a0)
    loss = 0
    for t in range(len(X)):
        x[t] = np.zeros((vocab_size, 1))
        if(X[t] != None):
            x[t][X[t],0] = 1
        a[t], y_hat[t] = cell_forward(parameters, a[t-1], x[t])
        loss += -np.log(y_hat[t][Y[t],0])
    cache = (y_hat, a, x)
    return loss, cache

In [9]:
def rnn_backward(X, Y, parameters, cache):
    gradients = {}
    (y_hat, a,x) = cache
    waa, wax,wya,by,b = parameters['waa'],parameters['wax'],parameters['wya'],parameters['by'],parameters['b']
    gradients['dwaa'] = np.zeros_like(waa)
    gradients['dwax'] = np.zeros_like(wax)
    gradients['dwya'] = np.zeros_like(wya)
    gradients['dby'] = np.zeros_like(by)
    gradients['db'] = np.zeros_like(b)
    gradients['da_next'] = np.zeros_like(a[0])
    for t in reversed(range(len(X))):
        dy = np.copy(y_hat[t])
        dy[Y[t]] -= 1
        gradients = cell_backward(dy, gradients, parameters, x[t], a[t], a[t-1])
    return gradients,a

In [10]:
def update_parameters(parameters, gradients, lr):
    parameters['wax'] += -lr * gradients['dwax']
    parameters['waa'] += -lr * gradients['dwaa']
    parameters['wya'] += -lr * gradients['dwya']
    parameters['b'] += -lr*gradients['db']
    parameters['by'] += -lr * gradients['dby']
    return parameters

In [11]:
def optimize(X,Y,a_prev,parameters,learning_rate=0.01):
    loss,cache = rnn_forward(X,Y,a_prev, parameters)
    gradients, a = rnn_backward(X,Y, parameters, cache)
    gradients = clip(gradients, 10)
    parameters = update_parameters(parameters,gradients, learning_rate)
    return loss, gradients, a[len(X) - 1]

In [12]:
def initialize_parameters(n_a, n_x, n_y):
    np.random.seed(1)
    wax = np.random.randn(n_a, n_x)*0.1
    waa = np.random.randn(n_a, n_a)*0.1
    wya = np.random.randn(n_y, n_a)*0.1
    b = np.zeros((n_a, 1))
    by = np.zeros((n_y, 1))
    parameters = {"wax":wax, "waa":waa, "wya":wya, "by":by, "b":b}
    return parameters

In [13]:
def sample(parameters, char_idx, seed):
    waa,wax,wya,by,b = parameters['waa'], parameters['wax'], parameters['wya'], parameters['by'],parameters['b']
    vocab_size = by.shape[0]
    n_a = waa.shape[1]
    x = np.zeros((vocab_size, 1))
    a_prev = np.zeros((n_a, 1))
    indices = []
    idx = -1
    counter = 0
    newline_char = char_idx['\n']
    while (idx != newline_char and counter != 50):
        a = np.tanh(np.dot(waa, a_prev) + np.dot(wax, x) + b)
        z = np.dot(wya, a) + by
        y = softmax(z)

        np.random.seed(counter + seed)
        idx = np.random.choice(list(range(vocab_size)), p=y.ravel())
        indices.append(idx)

        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        a_prev = a
        seed += 1
        counter += 1
    if (counter == 50):
        indices.append(char_idx['\n'])

    return indices

In [14]:
def train(idx_char, char_idx, num_iteration, n_a = 50, g_names = 8, vocab_size = 28, verbose = False):
    n_x, n_y = vocab_size, vocab_size
    parameters = initialize_parameters(n_a, n_x, n_y)
    loss = -np.log(1.0/vocab_size)*g_names
    with open('datasets_Male-Names.csv', 'r') as f:
        names = f.readlines()
    names = [x.lower().strip() for x in names]

    np.random.seed(0)
    np.random.shuffle(names)

    a_prev = np.zeros((n_a, 1))
    for j in range(num_iteration):
        idx = j%len(names)
        name = names[idx]
        name_in_char = [c for c in name]
        name_in_idx = [char_idx[c] for c in name_in_char]
        X = [None] + name_in_idx
        idx_of_newline = [char_idx["\n"]]
        Y = X[1:] + idx_of_newline
        curr_loss, gradients, a_prev = optimize(X,Y, a_prev, parameters, 0.01)
        if verbose and j in [0, len(names) - 1, len(names)]:
            print("j = ", j, "idx = ",idx)
        if verbose and j in [0]:
            print("name = ", name)
            print("name in char = ", name_in_char)
            print("name in index = ", name_in_idx)
            print("X = ", X, "\nY = ", Y)
        loss = loss*0.999 + curr_loss * 0.001
        if j % 3000 == 0:
            print('Iteration: {}, loss: {}'.format(j, loss))
            seed = 0
            for gn in range(g_names):
                index = sample(parameters, char_idx, seed)
                txt = ''.join(idx_char[i] for i in index)
                txt = txt[0].upper() + txt[1:]
                print("{}".format(txt))
                seed += 1
    return parameters

In [None]:
parameters = train(idx_char, char_idx, 990000, verbose = True)

j =  0 idx =  0
name =  monti
name in char =  ['m', 'o', 'n', 't', 'i']
name in index =  [14, 16, 15, 21, 10]
X =  [None, 14, 16, 15, 21, 10] 
Y =  [14, 16, 15, 21, 10, 0]
Iteration: 0, loss: 26.650630685011013
Nkzxwtcleqodyhsqw sijivt

Jnea

Kzwwtcleqodyhsqw sijivt

Nea

Zwwtcleqodyhsqw sijivt

Ea

Wwtcleqodyhsqw sijivt

A

Iteration: 3000, loss: 28.517098659241512
Mawsuadeep avhira

Jah   ril

Jusio dani

Mah

Vturam nikudhesdil kum rimuh   ait

Baahin   mojta

Turam mahubhesepdamku

A crmad

Iteration: 6000, loss: 27.062259176238637
Mavpreep sinr

Kul   sraga shnekarchawal samuom paldav khind

Kusin pankat shi

Mad

Vitl mank sherm

Babbosh

Sooen ralu

Aajit   pumok mund

j =  8147 idx =  8147
j =  8148 idx =  0
Iteration: 9000, loss: 26.066238944799586
Lawrim

Hak   sona

Huroo jangashank

Lab

Vitraj

Ab

Sribeer cranis

Aahor   momra kumart bhi

Iteration: 12000, loss: 25.636269456236192
Mettop mangay

Hala

Hussidam navish arinash

Mah chnda

Vishan paludipranddesh

Ad

Sonbeep