In [2]:
import numpy as np

def sigmoid(input, deriv=False):
    if deriv:
        return input * (1 - input)
    else:
        return 1 / (1 + np.exp(-input))

def tanh(input, deriv=False):
    if deriv:
        return 1 - input ** 2
    else:
        return np.tanh(input)

def softmax(input):
    e_input = np.exp(input - np.max(input))
    return e_input / e_input.sum(axis=0, keepdims=True)

class GRUModel:
    def __init__(self, vocab_size, hidden_size):
        self.vocab_size = vocab_size
        self.h_size = hidden_size
        self.learning_rate = 1e-1

        self.init_parameters()
        self.init_adagrad()

    def init_parameters(self):
        self.Wz = np.random.rand(self.h_size + self.vocab_size, self.h_size) * 0.1 - 0.05
        self.bz = np.zeros((self.h_size, 1))

        self.Wr = np.random.rand(self.h_size + self.vocab_size, self.h_size) * 0.1 - 0.05
        self.br = np.zeros((self.h_size, 1))

        self.Wh = np.random.rand(self.h_size + self.vocab_size, self.h_size) * 0.1 - 0.05
        self.bh = np.zeros((self.h_size, 1))

        self.Wy = np.random.rand(self.vocab_size, self.h_size) * 0.1 - 0.05
        self.by = np.zeros((self.vocab_size, 1))

    def init_adagrad(self):
        self.mdWy = np.zeros_like(self.Wy)
        self.mdWh = np.zeros_like(self.Wh)
        self.mdWr = np.zeros_like(self.Wr)
        self.mdWz = np.zeros_like(self.Wz)
        self.mdby = np.zeros_like(self.by)
        self.mdbh = np.zeros_like(self.bh)
        self.mdbr = np.zeros_like(self.br)
        self.mdbz = np.zeros_like(self.bz)

    def forward_pass(self, inputs):
        hprev = np.zeros((self.h_size, 1))
        z, r, h_hat, h = {}, {}, {}, {-1: hprev}

        for t in range(len(inputs)):
            x = np.zeros((self.vocab_size, 1))
            x[inputs[t]] = 1

            concat_hx = np.concatenate((h[t-1], x))
            r[t] = sigmoid(np.dot(self.Wr.T, concat_hx) + self.br)
            z[t] = sigmoid(np.dot(self.Wz.T, concat_hx) + self.bz)

            concat_hrx = np.concatenate((np.multiply(r[t], h[t-1]), x))
            h_hat[t] = tanh(np.dot(self.Wh.T, concat_hrx) + self.bh)
            h[t] = np.multiply(z[t], h[t-1]) + np.multiply(1 - z[t], h_hat[t])

        y = np.dot(self.Wy, h[t]) + self.by
        probDis = softmax(y)

        return z, r, h_hat, h, y, probDis

    def backward_pass(self, z, r, h_hat, h, y, probDis, inputs, targets):
        dWy, dWh, dWr, dWz = np.zeros_like(self.Wy), np.zeros_like(self.Wh), np.zeros_like(self.Wr), np.zeros_like(self.Wz)
        dby, dbh, dbr, dbz = np.zeros_like(self.by), np.zeros_like(self.bh), np.zeros_like(self.br), np.zeros_like(self.bz)
        
        loss = 0
        dy = probDis.copy()
        dy[targets] -= 1
        dWy += np.dot(dy, h[len(inputs) -1].T)
        dby += dy
        dhnext = np.dot(self.Wy.T, dy)

        for t in reversed(range(len(inputs))):
            x = np.zeros((self.vocab_size, 1))
            x[inputs[t]] = 1

            loss += -np.sum(x * np.log(probDis + 1e-9))

            dh = dhnext
            dh_hat = np.multiply(dh, 1 - z[t])
            dh_hat_raw = np.multiply(dh_hat, tanh(h_hat[t], deriv=True))
            dWh += np.dot(np.concatenate((r[t] * h[t-1], x), axis=0), dh_hat_raw.T)
            dbh += dh_hat_raw

            dr = np.dot(self.Wh[:self.h_size, :].T, dh_hat_raw) * h[t-1]
            dr_raw = np.multiply(dr, sigmoid(r[t], deriv=True))
            dWr += np.dot(np.concatenate((h[t-1], x), axis=0), dr_raw.T)
            dbr += dr_raw

            dz = np.multiply(dh, h_hat[t] - h[t-1])
            dz_raw = np.multiply(dz, sigmoid(z[t], deriv=True))
            dWz += np.dot(np.concatenate((h[t-1], x), axis=0), dz_raw.T)
            dbz += dz_raw

            dhprev = np.dot(self.Wz[:self.h_size, :].T, dz_raw) + np.dot(self.Wr[:self.h_size, :].T, dr_raw) + np.dot(self.Wh[:self.h_size, :].T, dh_hat_raw)
            dhnext = dhprev

        for param, dparam, mem in zip([self.Wy, self.by, self.Wh, self.bh, self.Wr, self.br, self.Wz, self.bz], 
                                      [dWy, dby, dWh, dbh, dWr, dbr, dWz, dbz], 
                                      [self.mdWy, self.mdby, self.mdWh, self.mdbh, self.mdWr, self.mdbr, self.mdWz, self.mdbz]):
            mem += dparam * dparam
            param -= self.learning_rate * dparam / np.sqrt(mem + 1e-8)

        return loss

    def train(self, inputs, targets, n_iters=100):
        for j in range(n_iters):
            for i in range(len(inputs)):
                z, r, h_hat, h, y, probDis = self.forward_pass(inputs[i])
                tgt = np.zeros((self.vocab_size, 1))
                tgt[targets] = 1
                loss = self.backward_pass(z, r, h_hat, h, y, probDis, inputs[i], targets[i])
            print(f"iteration: {j} loss: {loss}")

    def generate_text(self, inputs):
        _, _, _, _, _, probDis = self.forward_pass(inputs)
        return np.argmax(probDis, axis=0)
    
    # def generate_text(self, inputs, length):
    #     indexes = []
    #     for i in range(length):
    #         _, _, _, _, _, probDis = self.forward_pass(inputs)
    #         output = np.argmax(probDis, axis=0)
    #         indexes.append(output)
    #         input
    #     return indexes


In [3]:
import numpy as np

# Seed random
np.random.seed(0)

# Read data and setup maps for integer encoding and decoding.
with open('input.txt', 'r') as file: 
	data = file.read() 
    
chars = sorted(list(set(data))) # Sort makes model predictable (if seeded).
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 57254 characters, 67 unique.


In [4]:
inputs = []
targets = []
seq_length = 5
oo = 0
for k in range(0, len(data) - seq_length):
    input_seq = [char_to_ix[ch] for ch in data[k:k+seq_length]]
    target_char = char_to_ix[data[k+seq_length]]
    inputs.append(input_seq)
    targets.append(target_char)

inputs = inputs[:100]
targets = targets[:100]

In [5]:
print(type(targets))

<class 'list'>


In [6]:
neee = GRUModel(vocab_size, 10)
neee.train(inputs, targets,100)
    

iteration: 0 loss: 12.442233905744724
iteration: 1 loss: 12.411576581812268
iteration: 2 loss: 12.568232889896482
iteration: 3 loss: 12.707263866052214
iteration: 4 loss: 12.823577926477064
iteration: 5 loss: 12.916729800686902
iteration: 6 loss: 12.990486162511754
iteration: 7 loss: 13.049087962612994
iteration: 8 loss: 13.095983120826988
iteration: 9 loss: 13.133741043160867
iteration: 10 loss: 13.16424037965495
iteration: 11 loss: 13.18886522813448
iteration: 12 loss: 13.208655715063756
iteration: 13 loss: 13.224413682789418
iteration: 14 loss: 13.23677418544834
iteration: 15 loss: 13.246252863780699
iteration: 16 loss: 13.253277001614274
iteration: 17 loss: 13.258205961424357
iteration: 18 loss: 13.261344957422967
iteration: 19 loss: 13.262954756954565
iteration: 20 loss: 13.263258902537979
iteration: 21 loss: 13.262449380809626
iteration: 22 loss: 13.260691257569734
iteration: 23 loss: 13.258126567600467
iteration: 24 loss: 13.254877625086472
iteration: 25 loss: 13.251049857790099

In [9]:
hh = neee.generate_text(inputs[2])
print(ix_to_char[targets[5]])
print(hh)

m
[1]
