In [None]:
import time
import datetime

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
 
import tensorflow as tf
import tensorflow_datasets as tfds 

# 1. Task
For every timestep, two query digits are given. The network decides which of these two digits are most commonly presented in the sequence until the current timestep.

In [None]:
def generate_data_pair():
    max_length = 100  # upper limit on length of a sequence
    length = np.random.randint(1, max_length+1)  # decide sequence length
    
    input = np.random.randint(1, 10, (3,length))  # 1 digit + 2 queries
    
    output = np.zeros((length))  # most frequent element until t timepoint
    for t in range(length):
        output[t] = np.bincount(sequence[:t]).argmax()

    return (input, output)





# TODO: what type the data should be? ((context,sequence), target digit)?
# currently I am thinking that input should be 3-dimensional, i.e. 2 context + 1 digit 
# and output should be 1-dim logistic decision
tf.data.Dataset.from_generator(generate_data_pair, output_signature=)

TypeError: ignored

# 2. Model
To unroll the network, consider appending the network multiple times next to each other and feeding input at different locations. First do it using for loops, than change to graph mode.

In [None]:
class LSTM_cell (tf.keras.layers.Layer):
    def __init__(self, hidden_dim = 1):
        super(LSTM_cell, self).__init__()
        self.h = hidden_dim  # dimension of cell state and hidden state
        # TODO: init hidden_state and cell_state


    def build(self, input_shape):
        # forget gate
        self.w_f = self.add_weight(shape=(self.h,  # dim (h, d+h) with d = input_shape
                                          self.h + input_shape),
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        self.b_f = self.add_weight(shape=(self.h,),  # (h,1)
                                   # bias of forget gate is initially 1
                                   initializer=tf.keras.initializers.Constant(value=1.0),
                                   trainable=True)
        # input gate
        self.w_i = self.add_weight(shape=(self.h, self.h + input_shape),
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        self.b_i = self.add_weight(shape=(self.h,),
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        # candidate layer
        self.w_c = self.add_weight(shape=(self.h, self.h + input_shape),
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        self.b_c = self.add_weight(shape=(self.h,), 
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        # output gate
        self.w_o = self.add_weight(shape=(self.h, self.h + input_shape),
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        self.b_o = self.add_weight(shape=(self.h,), 
                                   initializer=tf.random_normal_initializer(),
                                   trainable=True)
        

    def call(self, input, (hidden_state, cell_state)):
        # [h_{t-1}, x_t] to get dim: (d+h,1) where 1 is a single time slice
        # TODO: axis might be wrong?
        concat_input = tf.keras.layers.Concatenate(axis=0)([hidden_state, input])
        
        # function to compute ouput of forget, input, output gates
        # e.g. f_t = sigmoid( w_f @ [h_t-1, x_t] + b_f )
        gate_output = lambda w,b: tf.keras.activations.sigmoid(
            tf.linalg.matmul(w, concat_input) + b)
        
        # forget gate 
        f_t = gate_output(self.w_f, self.b_f)
        # input gate
        i_t = gate_output(self.w_i, self.b_i)
        # candidates for new cell states, use tanh instead of sigmoid
        c_tilde_t = tf.linalg.matmul(self.w_c, concat_input) + self.b_c
        c_tilde_t = tf.keras.activations.tanh(c_tilde_t)
        # update cell states: C_t = f_t * C_t-1 + i_t * C_tilde_t
        self.cell_state = tf.math.multiply(f_t, self.cell_state) + tf.math.multiply(i_t, c_tilde_t)
        # output gate
        o_t = gate_output(self.w_o, self.b_o)
        # h_t = o_t * tanh(C_t)
        self.hidden_state = tf.math.multiply(o_t, tf.keras.activations.tanh(self.cell_state))
                                        
        return self.hidden_state

In [None]:
class LSTM_net (tf.keras.Model):
    '''
    Build a LSTM net with a single recurrent node
    '''
    def __init__(self, hidden_dim=1):
        super(LSTM_net, self).__init__()
        # readin layer dim is subject to change depending on data structure
        self.readin = tf.keras.layers.Dense(100, activation='relu', input_shape=(3,))
        self.recurrent = LSTM_cell(hidden_dim)
        # logistic classification
        self.readout = tf.keras.layers.Dense(1, actiation='sigmoid')

    
    def call(self, x):
        x = self.readin(x)
        x = self.recurrent(x)
        x = self.readout(x)
        return x

    # TODO: unroll the network