In [1]:
### First get data.

import numpy as np
from keras.utils.np_utils import to_categorical # Just for one-hots!
import random
import os

os.chdir(r"C:\Users\tedjt\Desktop\OIST\A313")
raw_text = open("wonderland.txt", 'r', encoding='utf-8').read()
raw_text = raw_text.lower().replace("\n", " ")

chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

n_chars, n_vocab = len(raw_text), len(chars)
seq_length = 5
T = seq_length
data_x, data_y = [], []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    data_x.append([char_to_int[char] for char in seq_in])
    data_y.append(char_to_int[seq_out])
n_patterns = len(data_x)

x = np.reshape(data_x, (n_patterns, seq_length))
x = to_categorical(data_x)
y = to_categorical(data_y)
x_keep, y_keep = x, y

print("Length of Alice in Wonderland: \t{} characters.".format(n_chars))
print("Different characters: \t\t{} characters.".format(n_vocab))
print("There are {} sequences of {} characters in a row.".format(n_patterns, seq_length))

def train_test(xs, ys, train_percent = .85):
    
    index = [i for i in range(len(ys))]
    random.shuffle(index)
    train_index = index[:int(train_percent * len(index))]
    test_index = index[int(train_percent * len(index)):]
    
    x_train = xs[train_index]
    x_test = xs[test_index]
    y_train = ys[train_index]
    y_test = ys[test_index]
    
    return(x_train, x_test, y_train, y_test)
    
x_train, x_test, y_train, y_test = train_test(x, y)
print("\nTrain: \n\t Input: {0}, \t Output: {1}.".format(x_train.shape, y_train.shape))
print("\nTest: \n\t Input: {0}, \t Output: {1}.".format(x_test.shape, y_test.shape))

def get_batch(test = False, size = 64):
    if(test): x, y = x_test, y_test
    else:     x, y = x_train, y_train
    index = [i for i in range(len(y))]
    random.shuffle(index)
    index = index[:size]
    x, y = x[index], y[index]
    return(x, y)

Length of Alice in Wonderland: 	143426 characters.
Different characters: 		38 characters.
There are 143421 sequences of 5 characters in a row.

Train: 
	 Input: (121907, 5, 38), 	 Output: (121907, 38).

Test: 
	 Input: (21514, 5, 38), 	 Output: (21514, 38).


In [2]:
### Now make model.

hidden_dim = 128

U = np.random.uniform(0, 1, (hidden_dim, seq_length))
W = np.random.uniform(0, 1, (hidden_dim, hidden_dim))
V = np.random.uniform(0, 1, (n_vocab, hidden_dim))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def forward(x):
    layers = []
    prev_s = np.zeros((hidden_dim, 1))
    for t in range(seq_length):
        new_input = np.zeros(x.shape)
        new_input[t] = x[t]
        mulu = np.dot(U, new_input)
        mulu = np.expand_dims(mulu[:,np.argmax(new_input[t])], 1) # Is this right?
        mulw = np.dot(W, prev_s)
        add = mulw + mulu
        s = sigmoid(add)
        mulv = np.dot(V, s)
        layers.append({'s':s, 'prev_s':prev_s})
        prev_s = s
    return(mulv)

In [3]:
def try_it(times = 1, length = 10):
    index = [i for i in range(x_test.shape[0])]
    random.shuffle(index)
    index = index[:times]
    xs = x_test[index]
    
    for x in xs:
        x_ = np.argmax(x, 1)
        x_ = ''.join([int_to_char[x_[i]] for i in range(seq_length)])
        pred_list = []
        for i in range(length):
            pred = forward(x)
            pred_list.append(int_to_char[np.argmax(pred)])
            pred = np.expand_dims(to_categorical(char_to_int[pred_list[-1]], num_classes = n_vocab), axis = 0)
            x = np.vstack([x[1:], pred])
        print("\nGiven:\n\t'{}'\n\npredicted:\n\t'{}'".format(x_, x_ + " | " + "".join(pred_list)))
    
try_it()


Given:
	' say '

predicted:
	' say  | rrrrrrrrrr'


In [4]:
def try_it_easier(length = 10):
    index = [i for i in range(x_keep.shape[0]-(length + seq_length))]
    random.shuffle(index)
    index = index[0]
    pred_list = []
    for i in range(length):
        seq_in = raw_text[index + i : index + i + seq_length]
        seq_in = [char_to_int[char] for char in seq_in]
        seq_in = to_categorical(seq_in, n_vocab)
        pred = forward(seq_in)
        pred_list.append(int_to_char[np.argmax(pred)])
    print("\nGiven:\n\t'{}'\n\npredicted:\n\t'{}'".format(
        raw_text[index : index + seq_length] + " | " + raw_text[index + seq_length : index + length + seq_length], 
        raw_text[index : index + seq_length] + " | " + "".join(pred_list)))

try_it_easier()


Given:
	'dormo | use out of'

predicted:
	'dormo | rrrrrrrrrr'


In [5]:
def loser(test = False, text = False, batch_size = "all"):
    if(test): X_, Y_ = x_test, y_test 
    else:     X_, Y_ = x_train, y_train  
    if(batch_size != "all"):
        index = [i for i in range(len(X_))]
        random.shuffle(index)
        index = index[:batch_size]
        X_, Y_ = X_[index], Y_[index]
    loss = 0.0
    for i in range(X_.shape[0]):
        x, y = X_[i], Y_[i]                    
        prev_s = np.zeros((hidden_dim, 1))          
        for t in range(seq_length):
            new_input = np.zeros(x.shape)    
            new_input[t] = x[t]
            mulu = np.dot(U, new_input)
            mulu = np.expand_dims(mulu[:,np.argmax(new_input[t])], 1) # Is this right?
            mulw = np.dot(W, prev_s)
            add = mulw + mulu
            s = sigmoid(add)
            if(text):
                print("\nU:\t{}\t*\tNew Input:\t{}\t\t=\tmulu:\t{}.".format(U.shape, new_input.shape, mulu.shape))
                print("W:\t{}\t*\tprev_s:\t\t{}\t=\tmulw:\t{}.".format(W.shape, prev_s.shape, mulw.shape))
                print("mulu + mulw\t=\ts:\t{}.".format(s.shape))
                print("mulu: {} min, {} max. \tmulw: {} min, {} max.".format(
                    round(mulu.min(), 3), 
                    round(mulu.max(), 3), 
                    round(mulw.min(), 3), 
                    round(mulw.max(), 3)))
            prev_s = s
        mulv = np.dot(V, s)
        if(text):
            print("\nV:\t{}\t*\ts:\t\t{}\t=\tmulv:\t{}.\ty: {}.".format(V.shape, s.shape, mulv.shape, y.shape))
            print("mulv: {} min, {} max.".format(round(mulv.min(), 3), round(mulv.max(), 3)))
        loss_per_record = (y - mulv.squeeze(1))**2 / 2
        loss += loss_per_record
    loss = loss / float(y.shape[0])
    loss = loss.sum()
    return(loss)

loser(test = True, text = True, batch_size = 1)


U:	(128, 5)	*	New Input:	(5, 38)		=	mulu:	(128, 1).
W:	(128, 128)	*	prev_s:		(128, 1)	=	mulw:	(128, 1).
mulu + mulw	=	s:	(128, 1).
mulu: 0.007 min, 0.996 max. 	mulw: 0.0 min, 0.0 max.

U:	(128, 5)	*	New Input:	(5, 38)		=	mulu:	(128, 1).
W:	(128, 128)	*	prev_s:		(128, 1)	=	mulw:	(128, 1).
mulu + mulw	=	s:	(128, 1).
mulu: 0.004 min, 0.984 max. 	mulw: 34.309 min, 44.698 max.

U:	(128, 5)	*	New Input:	(5, 38)		=	mulu:	(128, 1).
W:	(128, 128)	*	prev_s:		(128, 1)	=	mulw:	(128, 1).
mulu + mulw	=	s:	(128, 1).
mulu: 0.024 min, 0.986 max. 	mulw: 54.807 min, 71.483 max.

U:	(128, 5)	*	New Input:	(5, 38)		=	mulu:	(128, 1).
W:	(128, 128)	*	prev_s:		(128, 1)	=	mulw:	(128, 1).
mulu + mulw	=	s:	(128, 1).
mulu: 0.001 min, 0.999 max. 	mulw: 54.807 min, 71.483 max.

U:	(128, 5)	*	New Input:	(5, 38)		=	mulu:	(128, 1).
W:	(128, 128)	*	prev_s:		(128, 1)	=	mulw:	(128, 1).
mulu + mulw	=	s:	(128, 1).
mulu: 0.012 min, 1.0 max. 	mulw: 54.807 min, 71.483 max.

V:	(38, 128)	*	s:		(128, 1)	=	mulv:	(38, 1).	y: (38,

2046.1533556393242

In [None]:
### Train!

bptt_truncate = 5
min_clip_value = -10
max_clip_value = 10
learning_rate = .001

for epoch in range(25):
    # train model
    for i in range(x_train.shape[0]):
        x, y = x_train[i], y_train[i]

        layers = []
        prev_s = np.zeros((hidden_dim, 1))
        dU = np.zeros(U.shape)
        dV = np.zeros(V.shape)
        dW = np.zeros(W.shape)

        dU_t = np.zeros(U.shape)
        dV_t = np.zeros(V.shape)
        dW_t = np.zeros(W.shape)

        dU_i = np.zeros(U.shape)
        dW_i = np.zeros(W.shape)
        
        # forward pass
        for t in range(T):
            new_input = np.zeros(x.shape)
            new_input[t] = x[t]
            mulu = np.dot(U, new_input)
            mulu = np.expand_dims(mulu[:,np.argmax(new_input[t])], 1) # Is this right?
            mulw = np.dot(W, prev_s)
            add = mulw + mulu
            s = sigmoid(add)
            #print("\nU:\t{}\t*\tNew Input:\t{}\t\t=\tmulu:\t{}.".format(U.shape, new_input.shape, mulu.shape))
            #print("W:\t{}\t*\tprev_s:\t\t{}\t=\tmulw:\t{}.".format(W.shape, prev_s.shape, mulw.shape))
            #print("mulu + mulw\t=\ts:\t{}.".format(s.shape))
            #print("mulu: {} min, {} max. \tmulw: {} min, {} max.".format(
            #    round(mulu.min(), 3), 
            #    round(mulu.max(), 3), 
            #    round(mulw.min(), 3), 
            #    round(mulw.max(), 3)))
            layers.append({'s':s, 'prev_s':prev_s})
            prev_s = s
        mulv = np.dot(V, s)
        #print("\nV:\t{}\t*\ts:\t\t{}\t=\tmulv:\t{}.\ty: {}.".format(V.shape, s.shape, mulv.shape, y.shape))
        #print("mulv: {} min, {} max.".format(round(mulv.min(), 3), round(mulv.max(), 3)))
        # derivative of pred
        dmulv = (mulv.squeeze(1) - y)
        dmulv = np.expand_dims(dmulv, 1)
        #print(dmulv.shape, mulv.shape, y.shape)

        # backward pass
        for t in range(T):
            #print("dmulv: {}, other: {}".format(dmulv.shape, np.transpose(layers[t]['s']).shape))
            dV_t = np.dot(dmulv, np.transpose(layers[t]['s']))
            dsv = np.dot(np.transpose(V), dmulv)

            ds = dsv
            dadd = add * (1 - add) * ds

            dmulw = dadd * np.ones_like(mulw)

            dprev_s = np.dot(np.transpose(W), dmulw)


            for i in range(t-1, max(-1, t-bptt_truncate-1), -1):
                #print(i)
                ds = dsv + dprev_s
                dadd = add * (1 - add) * ds

                dmulw = dadd * np.ones_like(mulw)
                dmulu = dadd * np.ones_like(mulu)
                
                #print("dW_i BEFORE:", dW_i.shape)

                dW_i = np.dot(W, layers[t]['prev_s'])    # dW_i changes shape here!
                dprev_s = np.dot(np.transpose(W), dmulw)
                
                #print("dW_i AFTER:", dW_i.shape)
                #print("dU_i BEFORE:", dU_i.shape)

                new_input = np.zeros(x.shape)
                new_input[t] = x[t]
                dU_i = np.dot(U, new_input)              # dU_i changes shape here!
                dU_i = np.expand_dims(dU_i[:,np.argmax(new_input[t])], 1) # Is this right?
                dx = np.dot(np.transpose(U), dmulu)
                
                #print("dU_i AFTER:", dU_i.shape)
                
                dU_t += dU_i
                dW_t += dW_i
                
            dV += dV_t
            dU += dU_t
            dW += dW_t

            if dU.max() > max_clip_value:
                dU[dU > max_clip_value] = max_clip_value
            if dV.max() > max_clip_value:
                dV[dV > max_clip_value] = max_clip_value
            if dW.max() > max_clip_value:
                dW[dW > max_clip_value] = max_clip_value


            if dU.min() < min_clip_value:
                dU[dU < min_clip_value] = min_clip_value
            if dV.min() < min_clip_value:
                dV[dV < min_clip_value] = min_clip_value
            if dW.min() < min_clip_value:
                dW[dW < min_clip_value] = min_clip_value

        # update
        U -= learning_rate * dU
        V -= learning_rate * dV
        W -= learning_rate * dW
    
    loss = loser()
    val_loss = loser(True)
    print('Epoch: ', epoch + 1, ', Loss: ', loss, ', Val Loss: ', val_loss)
    
    print("\nLet's try it!")
    try_it(times = 1, length = 100)
    try_it_easier(length = 100)

Epoch:  1 , Loss:  1616.7112975429702 , Val Loss:  285.26411411998384

Let's try it!

Given:
	' wild'

predicted:
	' wild |                                                                                                     '

Given:
	'iece  | all round.  'but she must have a prize herself, you know,' said the mouse.  'of course,' the dodo re'

predicted:
	'iece  |                                                                                                     '
Epoch:  2 , Loss:  1658.220678419812 , Val Loss:  292.64576646044605

Let's try it!

Given:
	's a s'

predicted:
	's a s |                                                                                                     '

Given:
	'ure t | o ask the question?' said the lory.  alice replied eagerly, for she was always ready to talk about h'

predicted:
	'ure t |                                                                                                     '
Epoch:  3 , Loss:  1682.299772099967 , Val Loss:  296.8739678