In [1]:
import numpy as np

In [2]:
with open("Constantinople.txt", encoding="utf8") as data:
    text = data.read()

In [3]:
chars = list(set(text))

In [4]:
zero_char = [0]*len(chars)

In [5]:
Input_data = [text[i] for i in range(0,len(text)-1)]
Labels = [text[i] for i in range(1,len(text))]

In [6]:
input_data_vecs = []
labels_vecs = []
zero_char = [0]*len(chars)
for i in range(len(Input_data)):
    idx = chars.index(Input_data[i])
    x = zero_char.copy()
    x[idx] = 1
    input_data_vecs.append(x)
labels_vecs = []
for i in range(len(Labels)):
    idx = chars.index(Labels[i])
    y = zero_char.copy()
    y[idx] = 1
    labels_vecs.append(y)

In [7]:
inputs = np.array(input_data_vecs)
labels = np.array(labels_vecs)

In [8]:
class Vanilla_RNN:
    def __init__(self, learning_rate = 0.0005, n_epochs = 20, sequence_length = 20):
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.sequence_length = sequence_length
        self.W = None
        self.U = None
        self.V = None
        self.b = None
        self.c = None

    def loss(self, y_pred, y):
        return -np.sum(y*np.log(y_pred))


    def fit(self, inputs, outputs, size):
        self.W = np.random.randn(size, inputs.shape[1])
        self.U = np.random.randn(size, size)
        self.V = np.random.randn(inputs.shape[1], size)
        self.b = np.zeros((size, 1))
        self.c = np.zeros((inputs.shape[1], 1))

        #it is like making batches
        inputs_fin = []
        for i in range(0,inputs.shape[0], self.sequence_length):
            inputs_fin.append(inputs[i:i+self.sequence_length])
        outputs_fin = []
        for i in range(0,outputs.shape[0], self.sequence_length):
            outputs_fin.append(outputs[i:i+self.sequence_length])
        
        #iterating n_epochs time
        for n in range(self.n_epochs):

            #iterating batches
            for k in range(len(inputs_fin)):
                h_prev = np.zeros((size, 1))
                h = []
                y_probs = []
                loss_list = []
                for t in range(self.sequence_length):
                    h_prev = np.tanh(np.dot(self.U,h_prev) + np.dot(self.W, np.array(inputs_fin[k][t]).reshape(inputs.shape[1],1)) + self.b)
                    h.append(h_prev)
                    o_t = np.dot(self.V, h_prev) + self.c
                    y_t = np.exp(o_t)/np.sum(np.exp(o_t))
                    y_probs.append(y_t)
                    loss_list.append(self.loss(y_t, outputs_fin[k][t]))
                    
                L = np.sum(loss_list)
                #back_prop
                dV = np.zeros_like(self.V)
                dU = np.zeros_like(self.U)
                dW = np.zeros_like(self.W)
                db = np.zeros_like(self.b)
                dc = np.zeros_like(self.c)

                for t in reversed(range(self.sequence_length)):
                    dy = (np.array(y_probs[t]) - np.array(outputs_fin[k][t]).reshape(inputs.shape[1],1))
                    dV += dy @ h[t].T
                    dc += dy
                        
                    grad = (self.V.T @ dy) * (1-h[t]**2)
                    dW += grad @ np.array(inputs_fin[k][t]).reshape(inputs.shape[1],1).T
                    dU += grad @ h[t-1].T if t > 0 else np.zeros_like(self.U)
                    db += grad

                #updating_params
                self.W -= self.learning_rate * dW
                self.U -= self.learning_rate * dU
                self.V -= self.learning_rate * dV
                self.b -= self.learning_rate * db
                self.c -= self.learning_rate * dc

            print('# of epoch is',n+1,'   total loss is', L)

    def predict(self, input_text_array):
        h_prev = np.zeros_like(self.b)
        ys = []
        for t in range(self.sequence_length):
            y = np.zeros((input_text_array.shape[1],1))
            try:
                h_prev = np.tanh(np.dot(self.U,h_prev) + np.dot(self.W, np.array(input_text_array[t]).reshape(input_text_array.shape[1],1)) + self.b)
            except:
                h_prev = np.tanh(np.dot(self.U,h_prev) + np.dot(self.W, ys[t-1]) + self.b)
            o_t = np.dot(self.V, h_prev) + self.c
            y_t = np.exp(o_t)/np.sum(np.exp(o_t))
            indx = np.argmax(y_t)
            y[indx] = 1
            ys.append(y)

        return ys

In [9]:
rnn = Vanilla_RNN()
rnn.fit(inputs, labels,50)

# of epoch is 1    total loss is 21399.520021164433
# of epoch is 2    total loss is 16033.071594276416
# of epoch is 3    total loss is 15205.59341769396
# of epoch is 4    total loss is 14738.563737748576
# of epoch is 5    total loss is 14396.907170285154
# of epoch is 6    total loss is 14055.451280241909
# of epoch is 7    total loss is 13775.431134920558
# of epoch is 8    total loss is 13561.842986663447
# of epoch is 9    total loss is 13380.88473414147
# of epoch is 10    total loss is 13207.360826641396
# of epoch is 11    total loss is 13130.125315824263
# of epoch is 12    total loss is 13101.018563551672
# of epoch is 13    total loss is 13068.303746881174
# of epoch is 14    total loss is 13034.450197340431
# of epoch is 15    total loss is 12999.598686292267
# of epoch is 16    total loss is 12956.863415703361
# of epoch is 17    total loss is 12916.282831300427
# of epoch is 18    total loss is 12881.893846835643
# of epoch is 19    total loss is 12855.31695859144
# of 

In [11]:
text = 'what'

In [12]:
v= []
for i in range(len(text)):
    idx = chars.index(text[i])
    x = zero_char.copy()
    x[idx] = 1
    v.append(x)
    
test_text = np.array(v)

In [13]:
test_text

array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [14]:
jh = rnn.predict(test_text)

In [15]:
new_text = text
for i in range(len(text),len(jh)):
    new_text+=chars[np.argmax(jh[i])]

In [16]:
new_text

'whatan the the the t'

In [None]:
# Don't predicts well after all :))