<a href="https://colab.research.google.com/github/Spinkk/Implementing-ANNs-with-Tensorflow/blob/main/09/RNN_Minseok.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
%config Completer.use_jedi = False

  This is separate from the ipykernel package so we can avoid doing imports until


In [2]:
def data_gen_batch (max_t=10, batch_size=32):
    """
    Generate a batch of (input, label) pairs. Input is two query and one sequence digits for each timepoint.
    Output indicates which of two queries occur more frequently for each timepoint cumulatively.    
    """

    while True:
        t = np.random.randint(1, max_t+1) # fix time length of one sample
        
        x = np.random.randint(0, 10, (batch_size,t), dtype=np.int32) # actual input sequence
        c1 = np.random.randint(0, 10, (batch_size,t), dtype=np.int32)  # context 1
        c2 = np.random.randint(0, 10, (batch_size,t), dtype=np.int32)  # context 2

        def cumulative_occurence (seq, context):
            occ = np.zeros((batch_size,t))
            # occurence for a slice of sequence until time point i
            for i in range(t):
                # context digit at time i is broadcasted to match dim of seq
                cb = np.repeat(np.array([context[:,i]]).T, i+1, axis=1)
                # count how often digits match for each sample in a batch
                occ[:,i] = np.count_nonzero(seq[:, :i+1] == cb, axis=1)
            return occ
        
        count1 = cumulative_occurence(x,c1)
        count2 = cumulative_occurence(x,c2)
        label = np.array([count1 >= count2, count2 >= count1])  # (1,0): c1>c2, (0,1):c2>c1, (1,1):c1=c2
        label = np.transpose(label, (1,2,0))
        input = tf.concat([tf.one_hot(x,10), tf.one_hot(c1,10), tf.one_hot(c2,10)],-1)
        input = np.transpose(input, (1,0,2))
              
        yield (input,label)  # one-hot encoded then concatanated
        



ds = tf.data.Dataset.from_generator(generator=data_gen_batch,
                                    output_types=(tf.int32, tf.int32),
                                    # (batch,time,30), (2,batch,time,10)
                                    output_shapes=((None,None,30), (None,None,2))
                                    )

In [3]:
for input,label in ds.take(5):
    print(label.shape)

(32, 5, 2)
(32, 9, 2)
(32, 10, 2)
(32, 7, 2)
(32, 1, 2)


# Weight matrices, their meaning and required dimensionality

X means element wise product.

![image.png](attachment:image.png)


![image-2.png](attachment:image-2.png)

![image-3.png](attachment:image-3.png)

![image-4.png](attachment:image-4.png)

![image-5.png](attachment:image-5.png)

![image-6.png](attachment:image-6.png)

In [4]:
class LSTM_cell (tf.keras.layers.Layer):
    def __init__(self, hidden_dim = 100):
        super(LSTM_cell, self).__init__()
        self.h = hidden_dim


    def build(self, input_shape=100):        
        d = input_shape[1]  # This was the problem: input_shape is not a scalar
        h = self.h
        
        # dimension of output vector and weight matrix
        output_shape = (h,)  
        weight_shape = (h+d,h) 
        
        # forget gate
        self.w_f = self.add_weight(shape= weight_shape,
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        
        self.b_f = self.add_weight(shape=output_shape,
                                   # bias of forget gate is initially 1
                                   initializer=tf.keras.initializers.Constant(value=1.0),
                                   trainable=True)
        # input gate
        self.w_i = self.add_weight(shape=weight_shape,
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        
        self.b_i = self.add_weight(shape=output_shape,
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        
        # candidate layer
        self.w_c = self.add_weight(shape=weight_shape,
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        
        self.b_c = self.add_weight(shape=output_shape, 
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        
        # output gate
        self.w_o = self.add_weight(shape=weight_shape,
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        
        self.b_o = self.add_weight(shape=output_shape, 
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        

    def call(self, x_t, h_t, C_t):
        print('x,h,c', x_t.shape, h_t.shape, C_t.shape)
        # concatenate previous hidden_state and current input (shape is 20,1)
        concat = tf.concat([h_t, x_t], axis=1) # across axis with size 30 for input!
        print('concat shape', concat.shape)
        # calculate f_t (the forget gate)
        f_t = tf.nn.sigmoid( tf.linalg.matmul(concat,self.w_f) + self.b_f )   # weight shapes must be 20 in second dimension, 
                                                                               # otherwise matmul does not work
        # calculate the input gate i_t
        i_t = tf.nn.sigmoid( tf.linalg.matmul(concat,self.w_i) + self.b_i )
        
        # calculate the Cell state candidates (C_hat)
        C_hat_t = tf.nn.tanh( tf.linalg.matmul(concat,self.w_c) + self.b_c )
        
        # calculate the Cell state (C_t)
        C_t = f_t * C_t + i_t * C_hat_t
        
        # calculate the output state (o_t)
        o_t = tf.nn.sigmoid(tf.linalg.matmul(concat,self.w_o) + self.b_o )
        
        # calculate the new hidden state (h_t)
        h_t = o_t * tf.nn.tanh(C_t)
        
        return h_t, C_t

In [5]:
cell = LSTM_cell(hidden_dim=4)
cell(tf.constant([[1.0, 2, 3]]), tf.constant([[1, 1, 1.0, 1]]), tf.constant([[0.0]]))


x,h,c (1, 3) (1, 4) (1, 1)
concat shape (1, 7)


(<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
 array([[ 0.03137889, -0.01149101, -0.09742334,  0.0543483 ]],
       dtype=float32)>, <tf.Tensor: shape=(1, 4), dtype=float32, numpy=
 array([[ 0.06732637, -0.02697579, -0.1783053 ,  0.09609307]],
       dtype=float32)>)

In [6]:
class LSTM_net (tf.keras.Model):
    '''
    Build a LSTM net with a single recurrent node
    '''
    def __init__(self, hidden_dim=100, readin_dim=1000, batch_size=32):
        super(LSTM_net, self).__init__()
        self.h = hidden_dim
        self.readin = tf.keras.layers.Dense(readin_dim)
        self.recurrent = LSTM_cell(hidden_dim = self.h)
        self.readout = tf.keras.layers.Dense(2, activation = "sigmoid")
        
        # h_t and C_t of LSTM cell is saved externally
        # (batch_size, h) to have different states for each sample
        self.hidden_state = tf.Variable(tf.zeros((batch_size,self.h)), trainable=False) 
        self.cell_state = tf.Variable(tf.ones((batch_size,self.h)), trainable = False)
        
        
    @tf.function
    # TODO: for loop doesn't work with tf.function
    def call(self, x):            

        # GRAPH MODE: doesn't work as quite as expected?
        init_state = (0,  # time counter
                      tf.zeros((x.shape[1],self.h)),  # h(0)
                      tf.ones((x.shape[1],self.h)),  # c(0)
                      tf.TensorArray(dtype = tf.float32, size=0, dynamic_size=True))  # res array
        condition = lambda i,*_: i < x.shape[0]  # iterate over all time slice
        
        def body (i, h_t, c_t, ta):
            # forward computation
            digit = x[i]
            digit = self.readin(digit)
            h_t, c_t = self.recurrent(digit, h_t, c_t)
            digit = self.readout(h_t)
            ta = ta.write(i,digit)  # save results
            return i+1, h_t, c_t, ta  # counter+1

        _ = tf.while_loop(condition, body, init_state)
        return self.outputs


        # # EAGER TENSOR MODE
        # # reset hidden and cell state to an inital state
        # self.hidden_state.assign(tf.zeros((x.shape[1],self.h)))
        # self.cell_state.assign(tf.ones((x.shape[1],self.h)))
        # # array to save output sequences of the network
        # self.outputs = tf.TensorArray(dtype = tf.float32, size=0, dynamic_size=True)
        # # unrolling network by iterative forward computation
        # # x has dimension of (time, batch, 30)
        # for i,digit in enumerate(x):  
        #     digit = self.readin(digit)
        #     # update h_t and C_t to save externally
        #     self.hidden_state, self.cell_state = self.recurrent(digit, self.hidden_state, self.cell_state)
        #     digit = self.readout(self.hidden_state)
        #     self.outputs = self.outputs.write(i, digit)  # save output for time i
        # return self.outputs

In [7]:
lstm = LSTM_net()
for x,target in ds.take(1):
    res = lstm(x)
    print(res)

x,h,c (32, 1000) (32, 100) (32, 100)
concat shape (32, 1100)
x,h,c (32, 1000) (32, 100) (32, 100)
concat shape (32, 1100)
tf.Tensor(<unprintable>, shape=(), dtype=variant)
