This page will introduce the RNN(recurrent neural network). The difference between CNN and RNN is that CNN is independent on time. For example, we can use CNN to do animal recognition, but in a video or text, we cannot use CNN, as content in video or text are contextual. RNN is kind of neural network which can help make some decision based on previous experience and current input. The basic RNN structure is shown as follows:
<img src="RNN_Elman_model.png" width="70%">



The number of state neurons is same as that in the hidden layer(m).


Next I will use the tensorflow to implement RNN for sine sequence prediction.












In [44]:
import random
import numpy as np
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution()

def generateData(n):
    xs = []
    ys = []
    
    for i in range(2000):
        k = random.uniform(1, 50)
        x_seq = [np.sin(k+i) for i in range(n)]
        y_seq = [np.sin(k+n)]

        xs.append(x_seq)
        ys.append(y_seq)
        
    train_x = np.array(xs[0:1500])
    train_y = np.array(ys[0:1500])
    
    test_x = np.array(xs[1500:])
    test_y = np.array(ys[1500:])
        
    return train_x, train_y, test_x, test_y

class SINE_PREDICTION_RNN:
    def __init__(self, state_size, batch_size, train_x, train_y, test_x, test_y, init_state):
        self.state_size = state_size
        self.batch_size = batch_size
        self.W_input = tf.get_variable(name="weight_input", shape=[1, self.state_size], dtype=tf.float32)
        self.W_state = tf.get_variable(name="weight_state", shape=[self.state_size, self.state_size], dtype=tf.float32)
        # state neurons to hidden neurons(state_size * state_size) + input neurons to hidden neurons(1 * state_size)
        # the most difficult point to understand is that the input neuron size is 1, not backprop length(10). because
        # the 10 points are time dependent, we want to keep the state of these 10 points. 
        self.bias = tf.get_variable(name="bias_hidden_layer", shape=[1, self.state_size], dtype=tf.float32)
        
        self.W2 = tf.get_variable(name="weight_output", shape=[self.state_size, 1], dtype=tf.float32)
        self.bias2 = tf.get_variable(name="bias_output_layer", shape=[1], dtype=tf.float32)
        
        self.current_state = init_state
        
        self.train_x = train_x
        self.train_y = train_y
        self.test_x  = test_x
        self.test_y  = test_y
        
    def rnn_model(self, batch_x):
        input_series = tf.unstack(batch_x, axis=1)

        for current_input in input_series:
            #print(current_input.shape)
            current_input = tf.reshape(current_input, [-1, self.batch_size, 1])

            for current_input_batch in current_input:
                next_state = tf.tanh(tf.matmul(current_input_batch, self.W_input) + tf.matmul(self.current_state, self.W_state) + self.bias)
                #print(next_state.shape)
                self.current_state = next_state

         # after backprop length, we predict a value.
        logits = tf.matmul(self.current_state, self.W2) + self.bias2
        return logits
          
    def square_loss(self, batch_x, batch_y):  
        logits = self.rnn_model(batch_x)
        loss = tf.square(tf.subtract(batch_y, logits))
        
        return loss
             
    def cal_gradient(self, batch_x, batch_y):
        grad = tfe.implicit_value_and_gradients(self.square_loss)
        
        return grad(batch_x, batch_y)
        
    def train(self):
        print("train rnn network!")
        #optimizer = tf.train.AdamOptimizer(0.5)
        
        for epoch_id in range(10):
            for batch_id in range(len(self.train_x)//self.batch_size):
                begin = batch_id * batch_size
                end   = begin + batch_size
            
                batch_x = self.train_x[begin:end]
                batch_y = self.train_y[begin:end]
        
                loss, grads_and_vars = self.cal_gradient(batch_x, batch_y)
                train_step = tf.train.AdamOptimizer(1e-4).apply_gradients(grads_and_vars) # learning rate is 0.5
                step = epoch_id * (len(self.train_x)//self.batch_size) + batch_id
                print("step: {} loss: {} ".format(step, np.mean(loss.numpy())))

    def predict(self):
        for batch_id in range(len(self.test_x)//self.batch_size):
            begin = batch_id * batch_size
            end   = begin + batch_size

            batch_x = self.test_x[begin:end]
            batch_y = self.test_y[begin:end]
        
            logits = self.rnn_model(batch_x)
            
            for i in range(len(logits)):
                print("predicted: {} actual: {}".format(logits[i], batch_y[i]))
            
            

        
        
if __name__ == '__main__':
    truncated_backprop_length = 10  # the sequence length
# state size, same with hidden layer length, since state is the output of hidden layer, so they have same shape.
    state_size = 20  
    batch_size = 10 # input length

    train_x, train_y, test_x, test_y = generateData(truncated_backprop_length)
    #print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)
    train_x = tf.to_float(train_x)
    train_y = tf.to_float(train_y)
    test_x  = tf.to_float(test_x)
    test_y  = tf.to_float(test_y)
    # we calculate every batch size
    init_state = tf.to_float(np.zeros((batch_size, state_size)))
    
    sine_rnn = SINE_PREDICTION_RNN(state_size, batch_size, train_x, train_y, test_x, test_y, init_state)
    sine_rnn.train()
    sine_rnn.predict()
    
    
    

train rnn network!
step: 0 loss: 0.7190004587173462 
step: 1 loss: 1.388447880744934 
step: 2 loss: 0.6019634008407593 
step: 3 loss: 0.9055929183959961 
step: 4 loss: 1.1688721179962158 
step: 5 loss: 0.7631984353065491 
step: 6 loss: 1.2224540710449219 
step: 7 loss: 0.6127781867980957 
step: 8 loss: 0.6786105632781982 
step: 9 loss: 0.4962405562400818 
step: 10 loss: 0.8541634678840637 
step: 11 loss: 0.6780010461807251 
step: 12 loss: 0.5385524034500122 
step: 13 loss: 1.035314917564392 
step: 14 loss: 0.8187798261642456 
step: 15 loss: 1.139542579650879 
step: 16 loss: 0.753635585308075 
step: 17 loss: 0.716976523399353 
step: 18 loss: 0.8586071133613586 
step: 19 loss: 0.11498507112264633 
step: 20 loss: 0.8210633397102356 
step: 21 loss: 0.5490826964378357 
step: 22 loss: 0.5652639865875244 
step: 23 loss: 0.8229311108589172 
step: 24 loss: 0.5456200838088989 
step: 25 loss: 0.4421911835670471 
step: 26 loss: 0.9799153208732605 
step: 27 loss: 0.49566903710365295 
step: 28 loss:

step: 227 loss: 0.03692394495010376 
step: 228 loss: 0.04558062553405762 
step: 229 loss: 0.027381712570786476 
step: 230 loss: 0.03621988371014595 
step: 231 loss: 0.0231131874024868 
step: 232 loss: 0.033056922256946564 
step: 233 loss: 0.035627301782369614 
step: 234 loss: 0.024928351864218712 
step: 235 loss: 0.034772954881191254 
step: 236 loss: 0.023670004680752754 
step: 237 loss: 0.023494835942983627 
step: 238 loss: 0.03821585699915886 
step: 239 loss: 0.017277559265494347 
step: 240 loss: 0.02604086324572563 
step: 241 loss: 0.02429164946079254 
step: 242 loss: 0.028885379433631897 
step: 243 loss: 0.021298084408044815 
step: 244 loss: 0.027177339419722557 
step: 245 loss: 0.022723574191331863 
step: 246 loss: 0.016742516309022903 
step: 247 loss: 0.022038767114281654 
step: 248 loss: 0.016652822494506836 
step: 249 loss: 0.01763334684073925 
step: 250 loss: 0.020681986585259438 
step: 251 loss: 0.015351735055446625 
step: 252 loss: 0.017508454620838165 
step: 253 loss: 0.014

step: 442 loss: 0.0003747878654394299 
step: 443 loss: 0.0008183725876733661 
step: 444 loss: 0.0006772815249860287 
step: 445 loss: 0.0005766879767179489 
step: 446 loss: 0.0010870329570025206 
step: 447 loss: 0.0017742279451340437 
step: 448 loss: 0.0008471909095533192 
step: 449 loss: 0.002649905625730753 
step: 450 loss: 0.0007673726649954915 
step: 451 loss: 0.0006742222467437387 
step: 452 loss: 0.0006428946508094668 
step: 453 loss: 0.0009001254220493138 
step: 454 loss: 0.0008846133714541793 
step: 455 loss: 0.0011036701034754515 
step: 456 loss: 0.00041394648724235594 
step: 457 loss: 0.0011839175131171942 
step: 458 loss: 0.0005899512907490134 
step: 459 loss: 0.0009816198144108057 
step: 460 loss: 0.000978483585640788 
step: 461 loss: 0.0007665107259526849 
step: 462 loss: 0.0011040656827390194 
step: 463 loss: 0.0005054218927398324 
step: 464 loss: 0.0010124805849045515 
step: 465 loss: 0.0005407611606642604 
step: 466 loss: 0.0006510550738312304 
step: 467 loss: 0.00074738

step: 653 loss: 0.000685787876136601 
step: 654 loss: 0.0014766522217541933 
step: 655 loss: 0.00029287455254234374 
step: 656 loss: 0.0004080799990333617 
step: 657 loss: 0.0005843177204951644 
step: 658 loss: 0.00091121019795537 
step: 659 loss: 0.0011502497363835573 
step: 660 loss: 0.0005495001096278429 
step: 661 loss: 0.0005271611153148115 
step: 662 loss: 0.0004390339890960604 
step: 663 loss: 0.0005214704433456063 
step: 664 loss: 0.00039803452091291547 
step: 665 loss: 0.0001621716219233349 
step: 666 loss: 0.0007147775031626225 
step: 667 loss: 9.203827357850969e-05 
step: 668 loss: 0.0005169126088730991 
step: 669 loss: 0.0008223390323109925 
step: 670 loss: 0.0006799557595513761 
step: 671 loss: 0.0006792852655053139 
step: 672 loss: 0.0004424763028509915 
step: 673 loss: 0.0005773579468950629 
step: 674 loss: 0.00035552112967707217 
step: 675 loss: 0.0006714334595017135 
step: 676 loss: 0.0007957937195897102 
step: 677 loss: 0.0006134911673143506 
step: 678 loss: 0.0001855

step: 863 loss: 0.0001202555577037856 
step: 864 loss: 0.000138715302455239 
step: 865 loss: 0.0004492776351980865 
step: 866 loss: 0.0005491409101523459 
step: 867 loss: 0.00031835114350542426 
step: 868 loss: 0.0001873543078545481 
step: 869 loss: 0.0002856372739188373 
step: 870 loss: 0.00017328961985185742 
step: 871 loss: 0.00017380082863382995 
step: 872 loss: 0.00022046748199500144 
step: 873 loss: 0.0002937422541435808 
step: 874 loss: 0.0005305808153934777 
step: 875 loss: 0.00022320091375149786 
step: 876 loss: 0.00032349437242373824 
step: 877 loss: 0.00032957104849629104 
step: 878 loss: 0.0006637586047872901 
step: 879 loss: 0.00011066791921621189 
step: 880 loss: 0.0003479836159385741 
step: 881 loss: 9.252702147932723e-05 
step: 882 loss: 0.00010408012894913554 
step: 883 loss: 0.00032819603802636266 
step: 884 loss: 9.359245450468734e-05 
step: 885 loss: 0.00021224655210971832 
step: 886 loss: 8.597190026193857e-05 
step: 887 loss: 0.00031691844924353063 
step: 888 loss

step: 1069 loss: 0.0003769666363950819 
step: 1070 loss: 0.0001866289385361597 
step: 1071 loss: 0.0002828395226970315 
step: 1072 loss: 0.00035105994902551174 
step: 1073 loss: 0.00031671777833253145 
step: 1074 loss: 0.00012204505037516356 
step: 1075 loss: 0.0001922191440826282 
step: 1076 loss: 0.00010318154090782627 
step: 1077 loss: 0.0006895707338117063 
step: 1078 loss: 0.00034124814555980265 
step: 1079 loss: 0.0002576699189376086 
step: 1080 loss: 0.00017425410624127835 
step: 1081 loss: 0.00027752836467698216 
step: 1082 loss: 0.00043127857497893274 
step: 1083 loss: 0.00045654759742319584 
step: 1084 loss: 0.0002839879598468542 
step: 1085 loss: 0.0005171966040506959 
step: 1086 loss: 0.00034920027246698737 
step: 1087 loss: 0.000255426624789834 
step: 1088 loss: 0.00029006984550505877 
step: 1089 loss: 0.0003410454955883324 
step: 1090 loss: 0.00023606517061125487 
step: 1091 loss: 0.00031283876160159707 
step: 1092 loss: 0.00019808966317214072 
step: 1093 loss: 0.00016033

step: 1273 loss: 0.00018245425599161536 
step: 1274 loss: 0.00016088753181975335 
step: 1275 loss: 0.00038360414328053594 
step: 1276 loss: 0.0003098015731666237 
step: 1277 loss: 0.00018280012591276318 
step: 1278 loss: 0.00014250577078200877 
step: 1279 loss: 0.0002849462325684726 
step: 1280 loss: 0.000142566321301274 
step: 1281 loss: 0.0002453146444167942 
step: 1282 loss: 0.00017582463624421507 
step: 1283 loss: 0.0003243930987082422 
step: 1284 loss: 0.0002498929970897734 
step: 1285 loss: 0.0001775604614522308 
step: 1286 loss: 0.00022101509966887534 
step: 1287 loss: 0.0002257938467664644 
step: 1288 loss: 0.00022584658290725201 
step: 1289 loss: 0.0001691361248958856 
step: 1290 loss: 0.00015800476830918342 
step: 1291 loss: 0.00016092226724140346 
step: 1292 loss: 0.0002866210415959358 
step: 1293 loss: 0.00012399927072692662 
step: 1294 loss: 0.000165311066666618 
step: 1295 loss: 0.00010453021241119131 
step: 1296 loss: 0.00015627380344085395 
step: 1297 loss: 0.0001436713

step: 1477 loss: 0.00019235086801927537 
step: 1478 loss: 9.549099195282906e-05 
step: 1479 loss: 7.28691738913767e-05 
step: 1480 loss: 0.0002357076882617548 
step: 1481 loss: 2.956417483801488e-05 
step: 1482 loss: 2.2916907255421393e-05 
step: 1483 loss: 0.00017088624008465558 
step: 1484 loss: 5.178947321837768e-05 
step: 1485 loss: 0.00010373452823841944 
step: 1486 loss: 4.2059135012095794e-05 
step: 1487 loss: 0.00011572243238333613 
step: 1488 loss: 0.0001178438906208612 
step: 1489 loss: 0.00020034867338836193 
step: 1490 loss: 9.002632577903569e-05 
step: 1491 loss: 0.0001237224932992831 
step: 1492 loss: 5.599837822956033e-05 
step: 1493 loss: 0.00018128463125322014 
step: 1494 loss: 0.0001868188555818051 
step: 1495 loss: 7.760614244034514e-05 
step: 1496 loss: 0.00016168701404239982 
step: 1497 loss: 0.0003406548057682812 
step: 1498 loss: 9.762636909727007e-05 
step: 1499 loss: 0.0004350269737187773 
predicted: [-0.9991721] actual: [-0.99985725]
predicted: [-0.79091656] a

predicted: [0.8764876] actual: [0.85433036]
predicted: [-0.5099655] actual: [-0.5196744]
predicted: [0.7646984] actual: [0.76040584]
predicted: [-0.981187] actual: [-0.9840106]
predicted: [-0.6667417] actual: [-0.68283635]
predicted: [-0.2852126] actual: [-0.2687222]
predicted: [-0.5074318] actual: [-0.52539825]
predicted: [0.13142857] actual: [0.1286001]
predicted: [-0.41885504] actual: [-0.42143512]
predicted: [0.06333071] actual: [0.06752308]
predicted: [0.96250594] actual: [0.95575255]
predicted: [0.9766846] actual: [0.98347986]
predicted: [0.9672445] actual: [0.96003085]
predicted: [-0.38914812] actual: [-0.40259004]
predicted: [0.7031239] actual: [0.7012268]
predicted: [0.97770834] actual: [0.9853912]
predicted: [-0.9653095] actual: [-0.9559565]
predicted: [0.86741257] actual: [0.8613122]
predicted: [-0.15565982] actual: [-0.17463043]
predicted: [0.8301829] actual: [0.82338625]
predicted: [0.26109323] actual: [0.28385133]
predicted: [0.58804643] actual: [0.5999717]
predicted: [-1

The result seems ok. An interesting situation is that if I use "optimizer = tf.train.GradientDescentOptimizer(0.5)" to replace the AdamOptimzer in the above code, the training will not effect as loss will be inf. Need some time to know that how optimizer will work in the training later.