#  Order Invariant Sequences Using LSTMs


### **Purpose:** 
The purpose of this program is to perform meaningful calculations on a sequence of data using Long Short Term Memory. This program particularly takes in sets of 10 images that contain hand written digits and tries to count how many of each digit there are. This is a variation on a simple classification problem. This code is a simple implementation of the paper ORDERMATTERS: SEQUENCE TO SEQUENCE FOR SETS (Vinyals et al., 2015b).

### **Setup:** 
A requirement for this program to work is to have the mnist training data in the same folder as this code


#### Configure parameters for this network:
`
#HyperParameters

seq_batch_size = 3

num_epochs = 100

num_cycles=2000000

learning_rate = 0.0015

stride = 1


#Parameters


layer1_output_nodes = 32

layer2_output_nodes = 64

dense_layer1_output_nodes = 64

dense_layer2_output_nodes = 128

img_width = 28

img_height = 28

img_color_dim = 1

seq_length=10

T_steps=5

lamda=.001

mem_size=64

save_rate=5000`

#### Instantiate placeholders

In [None]:
X = tf.placeholder(tf.float32, [None, img_width, img_height, img_color_dim])
X_seq = tf.placeholder(tf.float32, [None])
X_Train, Y_Train=data.import_mnist()
Qt=tf.placeholder(tf.float32, [None])
Ct=tf.placeholder(tf.float32, [None])


   ### Create Embedding network which is just a CNN
    
`def conv_net(X, mem_size,reuse=True):
    with tf.variable_scope("Convnet", reuse=tf.AUTO_REUSE):
        layer1 = nnUtils.create_new_conv_layer(X, layer1_output_nodes, [5, 5], [1, 1], stride, "layer1")
        layer2 = nnUtils.create_new_conv_layer(layer1, layer2_output_nodes, [5, 5], [1, 1], stride, "layer2")
        flattened = tf.contrib.layers.flatten(layer2)
        dense_layer1 = tf.layers.dense(flattened,128, activation=tf.nn.relu)
        dense_layer2=tf.layers.dense(dense_layer1,256, activation=tf.nn.relu)
        mem_vector = tf.layers.dense(dense_layer2, mem_size)
    return mem_vector`

### Function creates a memory cell

In [None]:
def q_t_LSTM(q_t_p,ct,reuse=False):
    with tf.variable_scope("Memory/Cell", reuse=reuse):
        ht, ct =nnUtils.lstm(q_t_p,ct,mem_size*2)
        qt=tf.layers.dense(ht,mem_size)
        return qt, ct


### Process images
#Achieves order invariance by using addition which in communicative

`def process_block(T_steps,X,X_seq,m, q_t_p,ct, first_call=False):
    for t in range(T_steps):
        if first_call:
            q_t, ct =q_t_LSTM(q_t_p, ct,reuse=(t!=0))
        else:
            q_t, ct =q_t_LSTM(q_t_p, ct, reuse=True)
        r_t=[]
        e_j=[]
        x=[]
        qtt=tf.transpose(q_t)
        e_j=tf.matmul(m,qtt)
        a_j=tf.nn.softmax(e_j)
        for i in range(seq_length):
            r_t.append(a_j[i]*m[i])
        r_t=tf.reduce_sum(r_t,0)
        for i in range(mem_size):
            x.append(q_t[0][i])
        for i in range(mem_size):
            x.append(r_t[i])
        q_t_p=x
    return tf.concat([q_t[0],r_t],0), ct`


### Calculate Output Guess
`Performs the calculation for the guesses on how many of each digit there are
def write_block(qtp, reuse=True):
    with tf.variable_scope("Decoder",reuse=reuse):
        x=tf.transpose(tf.expand_dims(tf.expand_dims(tf.expand_dims(qtp, 1), 1),1))
        layer1 = nnUtils.create_new_conv_layer(x, layer1_output_nodes, [5, 5], [1, 1], stride, "layer1")
        flattened = tf.contrib.layers.flatten(layer1)
        dense_layer1 = tf.layers.dense(flattened,64, activation=tf.nn.relu)
        dense_layer2 = tf.layers.dense(dense_layer1,seq_length)
        mem_vector=tf.reshape(dense_layer2,[-1,seq_length])`

### Run Calculations and train network



In [None]:
with tf.Session() as sess:
    try:
        saver.restore(sess, full_path)
        sess.run(increment_iter)
        print("Loaded Checkpoint!")
    except:
        sess.run(init)
    writer.add_graph(sess.graph)
    total_loss=0
    recent_loss=0
    summ=tf.summary.merge_all()
    qt=[]
    ct=[]
    out_2=[]
    for i in range(num_cycles):
        xseqbatch,xbatch=data.generate_variable_sequence(X_Train,Y_Train,seq_length)
        if i==0:
            q_t_p=np.random.rand(mem_size*2).astype(np.float32)
            ct=np.random.rand(mem_size*2).astype(np.float32)
            qt, ct= sess.run(Process_init,{X: xbatch, X_seq: xseqbatch, Qt: q_t_p, Ct: ct})
            qt=np.transpose(qt)
            ct=np.transpose(ct[0])
            mem=sess.run(m,{X: xbatch})
            out=sess.run(Write_init,{m: mem, Qt: qt, Ct: ct})
        else:
            qt, ct= sess.run(Process,{X: xbatch, X_seq: xseqbatch, Qt: qt, Ct: ct})
            qt=np.transpose(qt)
            ct=np.transpose(ct[0])
            mem=sess.run(m,{X: xbatch})
            out=sess.run(Write,{m: mem, Qt: qt, Ct: ct})
        out_2 = point_alloc=get_points(out[0])
        #out_2=np.reshape(out_2,[1,-1])
        _, loss = sess.run([optimizer, write_loss], {
                       Write: out, X_seq: xseqbatch, X: xbatch, Qt: qt, Ct: ct})
        total_loss+=loss
        recent_loss+=loss

### Memory Cell block

In [None]:
def lstm(ht,ct_old,num_layers):
    ht=tf.reshape(ht,[-1,num_layers])
    ct_old=tf.reshape(ct_old,[-1,num_layers])
    ft=tf.layers.dense(ht, num_layers, activation=tf.nn.sigmoid)
    it=tf.layers.dense(ht, num_layers, activation=tf.nn.sigmoid)
    c_tilda=tf.layers.dense(ht, num_layers, activation=tf.nn.tanh)
    ct=tf.add(tf.multiply(ft,ct_old),tf.multiply(it,c_tilda))
    ot=tf.layers.dense(ht, num_layers, activation=tf.nn.sigmoid)
    ht_new=ot*tf.nn.tanh(ct)
    return ht_new, ct
