In [1]:
import numpy as np

In [2]:
class Softmax:
    def __init__(self):
        self.type = 'Softmax'
        self.eps = 1e-15

    def forward(self, Z):
        self.Z = Z

        t = np.exp(Z - np.max(Z, axis=0))
        self.A =  t / np.sum(t, axis=0, keepdims=True)

        return self.A

class Tanh:
    def __init__(self):
        self.type = 'Tanh'

    def forward(self, Z):
        self.A = np.tanh(Z)

        return self.A

    def backward(self, dA):
        dZ = dA * (1 - np.power(self.A, 2))

        return dZ
    
class CrossEntropyLoss:
    def __init__(self):
        self.type = 'CELoss'
        self.eps = 1e-15
        self.softmax = Softmax()
    
    def forward(self, Y_hat, Y):
        self.Y = Y
        self.Y_hat = Y_hat

        _loss = - Y * np.log(self.Y_hat)
        loss = np.sum(_loss, axis=0).mean()

        return np.squeeze(loss) 

    def backward(self):
        grad = self.Y_hat - self.Y
        
        return grad


class SGD:
    def __init__(self, lr=0.0075, beta=0.9):
        self.beta = beta
        self.lr = lr

    def optim(self, weights, gradients, velocities=None):
        if velocities is None: velocities = [0 for weight in weights]

        velocities = self._update_velocities(
            gradients, self.beta, velocities
        )
        new_weights = []

        for weight, velocity in zip(weights, velocities):
            weight -= self.lr * velocity
            new_weights.append(weight)

        return new_weights, velocities

    def _update_velocities(self, gradients, beta, velocities):
        new_velocities = []

        for gradient, velocity in zip(gradients, velocities):

            new_velocity = beta * velocity + (1 - beta) * gradient
            new_velocities.append(new_velocity)

        return new_velocities


def one_hot_encoding(input, size):
    output = []

    for index, num in enumerate(input):
        one_hot = np.zeros((size,1))

        if (num != None):
            one_hot[num] = 1
    
        output.append(one_hot)

    return output

In [3]:
class RNN:
    """
    Implementare retea recurenta
    """
    def __init__(self, input_dim, output_dim, hidden_dim):
        """
        Initializare parametrii pe baza hiperparametrilor.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim

        params = self._initialize_parameters(
                input_dim, output_dim, hidden_dim
        )
        self.Wya, self.Wax, self.Waa, self.by, self.b = params
        self.softmax = Softmax()
        self.oparams = None

    def forward(self, input_X):
        """
        Propagarea inainte prin retea
        """
        self.input_X = input_X
 
        self.layers_tanh = [Tanh() for x in input_X]
        hidden = np.zeros((self.hidden_dim , 1))

        self.hidden_list = [hidden]
        self.y_preds = []

        for input_x, layer_tanh in zip(input_X, self.layers_tanh):
            input_tanh = np.dot(self.Wax, input_x) + np.dot(self.Waa, hidden) + self.b
            hidden = layer_tanh.forward(input_tanh)
            self.hidden_list.append(hidden)

            input_softmax = np.dot(self.Wya, hidden) + self.by
            y_pred = self.softmax.forward(input_softmax)
            self.y_preds.append(y_pred)

        return self.y_preds

    def loss(self, Y):
        """
        Functie de cost a retelei pentru clasificare.
        """
        self.Y = Y
        self.layers_loss = [CrossEntropyLoss() for y in self.Y]
        cost = 0

        for y_pred, y, layer in zip(self.y_preds, self.Y, self.layers_loss):
            cost += layer.forward(y_pred, y)

        return cost

    def backward(self):  
        """
        Propagarea inapoi prin timp in reteaua recurenta.
        """
        gradients = self._define_gradients()
        self.dWax, self.dWaa, self.dWya, self.db, self.dby, dhidden_next = gradients

        for index, layer_loss in reversed(list(enumerate(self.layers_loss))):
            dy = layer_loss.backward()

            # hidden actual
            hidden = self.hidden_list[index + 1]
            hidden_prev = self.hidden_list[index]

            # gradients y
            self.dWya += np.dot(dy, hidden.T)
            self.dby += dy
            dhidden = np.dot(self.Wya.T, dy) + dhidden_next

            # gradients a
            dtanh = self.layers_tanh[index].backward(dhidden)
            self.db += dtanh
            self.dWax += np.dot(dtanh, self.input_X[index].T)
            self.dWaa += np.dot(dtanh, hidden_prev.T)
            dhidden_next = np.dot(self.Waa.T, dtanh)

    def clip(self, clip_value):
        """
        Gradient clipping/limitarea valorilor pentru a evita problema exploding gradients.
        """
        for gradient in [self.dWax, self.dWaa, self.dWya, self.db, self.dby]:
            np.clip(gradient, -clip_value, clip_value, out=gradient)

    def optimize(self, method):
        """
        Optimizare retea cu o metoda custom
        """
        weights = [self.Wya, self.Wax, self.Waa, self.by, self.b]
        gradients = [self.dWya, self.dWax, self.dWaa, self.dby, self.db]

        weights, self.oparams = method.optim(weights, gradients, self.oparams)
        self.Wya, self.Wax, self.Waa, self.by, self.b = weights
        
    
    def generate_names(
        self, index_to_character
    ):
        letter = None
        indexes = list(index_to_character.keys())

        letter_x = np.zeros((self.input_dim, 1))
        name = []

        # similar to forward propagation.
        layer_tanh = Tanh()
        hidden = np.zeros((self.hidden_dim , 1))

        while letter != '\n' and len(name)<15:

            input_tanh = np.dot(self.Wax, letter_x) + np.dot(self.Waa, hidden) + self.b
            hidden = layer_tanh.forward(input_tanh)

            input_softmax = np.dot(self.Wya, hidden) + self.by
            y_pred = self.softmax.forward(input_softmax)

            index = np.random.choice(indexes, p=y_pred.ravel())
            letter = index_to_character[index]

            name.append(letter)

            letter_x = np.zeros((self.input_dim, 1))
            letter_x[index] = 1

        return "".join(name)


    def _initialize_parameters(self, input_dim, output_dim, hidden_dim):
        """
        Initializare random a parametrilor
        """
        den = np.sqrt(hidden_dim)

        weights_y = np.random.randn(output_dim, hidden_dim) / den
        bias_y = np.zeros((output_dim, 1))

        weights_ax = np.random.randn(hidden_dim, input_dim) / den
        weights_aa = np.random.randn(hidden_dim, hidden_dim) / den
        bias = np.zeros((hidden_dim, 1))

        return weights_y, weights_ax, weights_aa, bias_y, bias


    def _define_gradients(self):
        """
        Initializare gradienti
        """
        dWax = np.zeros_like(self.Wax)
        dWaa = np.zeros_like(self.Waa)
        dWya = np.zeros_like(self.Wya)

        db = np.zeros_like(self.b)
        dby = np.zeros_like(self.by)

        da_next = np.zeros_like(self.hidden_list[0])

        return dWax, dWaa, dWya, db, dby, da_next

In [5]:
person_names = open('nume.txt', 'r').read()
person_names= person_names.lower()
characters = list(set(person_names))
 
character_to_index = {character:index for index,character in enumerate(sorted(characters))}
index_to_character = {index:character for index,character in enumerate(sorted(characters))}
 
with open("nume.txt") as f:
    person_names = f.readlines()


person_names = [name.lower().strip() for name in person_names]
np.random.shuffle(person_names)

In [9]:
num_epochs = 100001
input_dim = 27
output_dim = 27
hidden_dim = 50
 
# initialize and define the model hyperparamaters
model = RNN(input_dim, output_dim, hidden_dim)
optim = SGD(lr=0.001)
costs = []

In [10]:
for epoch in range(num_epochs):
     
    # create the X inputs and Y labels
    index = epoch % len(person_names)
    X = [None] + [character_to_index[ch] for ch in person_names[index]] 
    Y = X[1:] + [character_to_index["\n"]]
 
    # transform the input X and label Y into one hot enconding.
    X = one_hot_encoding(X, input_dim)
    Y = one_hot_encoding(Y, output_dim)
     
    # steps of the model
    model.forward(X)
    cost = model.loss(Y)
    model.backward()
    # clip gradients
    model.clip(clip_value=1)
    # optimize
    model.optimize(optim)
 
    if epoch % 10000 == 0:
        print ("Cost after iteration %d: %f" % (epoch, cost))
        costs.append(cost)
 
        print('Names created:', '\n')
        for i in range(4):
            name = model.generate_names(index_to_character)
            print(name)

Cost after iteration 0: 29.549004
Names created: 

uxbkcbonhmctijd
wbyruj

nsvyvjiefdkqzhb
zlputwkuhecealh
Cost after iteration 10000: 14.830316
Names created: 

rilrll

fajis

miroant

yareta

Cost after iteration 20000: 14.754257
Names created: 

ajzre

tellef

totira

hinnein

Cost after iteration 30000: 20.483688
Names created: 

xrauri

biestaru

gryshes

cwiryy

Cost after iteration 40000: 25.459537
Names created: 

yterlicto

cerys

ryen

jakin

Cost after iteration 50000: 15.062868
Names created: 

jeza

jodohra

eafha

rasen

Cost after iteration 60000: 22.603483
Names created: 

drionelle

coreille

yldi

maliano

Cost after iteration 70000: 12.688049
Names created: 

jada

kishe

idshan

kaniea

Cost after iteration 80000: 13.247217
Names created: 

fardany

ahonachie

uy

teyuda

Cost after iteration 90000: 15.419439
Names created: 

anizau

chalita

tavewna

kileye

Cost after iteration 100000: 17.492231
Names created: 

akuyla

darir

rizida

calles

