In [1]:
import tensorflow as tf
import numpy as np

In [2]:
import Batch_Focusing

In [3]:
import Batch_RWV_Generation

In [33]:
class NTMCell(tf.keras.layers.AbstractRNNCell):
    
    def __init__(self, rnn_size, memory_rows, memory_columns, num_read_heads, num_write_heads, num_bits_per_output_vector, controller = tf.keras.layers.LSTMCell,addressing_type = 'LOC', shift_range = tf.range(-1,2), **kwargs
                 ):
        
        super().__init__(**kwargs)
        
        
        self.rnn_size = rnn_size
        self.memory_rows = memory_rows       #The "N" or "size of memory" in Literature
        self.memory_columns = memory_columns      #The "M" or "memory vector's dimension" in Literature
        self.num_read_heads = num_read_heads
        self.num_write_heads = num_write_heads
        self.num_bits_per_output_vector = num_bits_per_output_vector
        self.addressing_type = addressing_type
        
        if ((self.addressing_type != 'LOC') and (self.addressing_type != 'CONT')):
            raise ValueError('Incorrect Addressing Type: Allowed values are "LOC" for Location based Focusing and "CONT" for Content based Focusing.')

        
        self.shift_range = shift_range
        
        self.total_num_heads = self.num_read_heads + self.num_write_heads
        
        self.controller = controller(self.rnn_size)
        
        self.output_dim = self.num_bits_per_output_vector #vector_dim
        
        self.total_parameters = ( 3 * self.memory_columns + 3 + len(self.shift_range) )*(self.num_write_heads + self.num_read_heads)
        
        self.PMG_Layer = tf.keras.layers.Dense(units= self.total_parameters, use_bias=True) #PMG_Layer = Parameter Matrix GeneratingLayer CHECK INTITIALISATION OF PARAMETERS FOR IMPROVEMENT
        
        self.NTM_ouput_gen_layer = tf.keras.layers.Dense(units= self.output_dim,use_bias = True)
        
    def call(self, inputs, previous_states):

        '''
        inputs: shape = (Batch_size, input_features) where input_features is equal to num_bits_per_output_vector.
        previous_states: dictionary, contains: 1. controller_state (list of two matrices, one for Memory and one for Carry, both of size [Batch_size, RNN_size]),
                                               2. All_Read_vectors of size [Num_Read_Heads, Batch_size, Memory_dim(M)]
                                               3. All_Weights of size [Num_ALL_Heads, Batch_size, Memory_size(N)]
                                               4. Memory_Matrix of size (Batch_size, Memory_size(N), Memory_dim(M))

        '''

        #Since controller itself is a LSTMCell, thus it would demand a input of shape [Batch_size, features].
        #We construct a controller whose input will be of size [Batch_size, features_for_controller]
        #where features_for_controller is Num_Read_Heads * Memory_dim(M) + input_features

        All_prev_read_vectors = previous_states['All_Read_vectors']

        prev_controller_state = previous_states['controller_state']

        M_prev = previous_states['Memory_Matrix']

        w_prev = previous_states['All_Weight_vectors']
        #^Of shape [num_total_heads, batch_size, N]

        assert inputs.shape[1] == self.num_bits_per_output_vector

        controller_input = [All_prev_read_vectors[i] for i in range(All_prev_read_vectors.shape[0])]
       
        controller_input.append(inputs)

        controller_input = tf.concat(controller_input, axis = 1)

        assert controller_input.shape[1] == self.num_read_heads * self.memory_columns + inputs.shape[1]

        controller_output, controller_state = self.controller(controller_input, prev_controller_state)
        #controller_output is of the same shape as the controller_input

        Parameter_Matrix = self.PMG_Layer(controller_output)
        #Parameter_Matrix is of shape [Batch_size, self.total_parameters]

        Each_Heads_PM_list = tf.split(Parameter_Matrix, self.num_read_heads + self.num_write_heads,axis = 1)
        #Contains Each Head's Parameter matrix; is of total length self.num_read_heads + self.num_write_heads.

        All_Heads_W_list = []
        All_Heads_R_list = []

        #To get the weights for each Head in the whole Batch
        #To get the Read Vectors and Updated Memory Matrix, we assume first self.num_read_heads to be READ Heads and rest to be WRITE Heads
        for i,Head_PM in enumerate(Each_Heads_PM_list):

            k_t, beta_t, g_t, s_t, gamma_t, a_t, e_t = tf.split(Head_PM, [self.memory_columns, 1, 1, len(self.shift_range), 1, self.memory_columns, self.memory_columns], axis = 1)

            #EXPERIMENT WITH OTHER VALID COMBINATIONS OF THE BELOW USED ACTIVATIONS
            
            #For k_t:-
            k_t = tf.tanh(k_t)
            #For beta_t:-
            beta_t = tf.sigmoid(beta_t)
            #For g_t:-
            g_t = tf.sigmoid(g_t)
            #For s_t:-
            s_t = tf.nn.softmax(s_t + 1e-10)
            #The above s_t is one of the points where we can improve
            #For gamma_t:-
            gamma_t = tf.math.log(tf.exp(gamma_t) + 1) + 1
            #For a_t:-
            a_t = tf.tanh(a_t)
            #For e_t:-
            e_t = tf.sigmoid(e_t)

            if self.addressing_type == 'LOC':
                Heads_w_t = Batch_Focusing.LocationFocusing( k_t, M_prev, beta_t,    g_t, w_prev[i], s_t, gamma_t,   K = None)
            elif self.addressing_type == 'CONT':
                Heads_w_t = Batch_Focusing.ContentFocusing( k_t, M_prev, beta_t, K = None)
                #^Should be of shape [batch_size,N]

            if i<self.num_read_heads:
                r_t = Batch_RWV_Generation.ReadVector(M_prev,Heads_w_t)
                All_Heads_R_list.append(r_t)
            elif i>=self.num_read_heads:
                M_prev = Batch_RWV_Generation.WriteOnMemory(M_prev,Heads_w_t,e_t,a_t)


            All_Heads_W_list.append(Heads_w_t)

        #Please Note that at this point M_prev has been updated to the new weight Matrix

        All_W_Matrix = tf.convert_to_tensor(All_Heads_W_list) #W for Weights
        #^Of shape [num_total_heads, batch_size, N]

        All_R_Matrix = tf.convert_to_tensor(All_Heads_R_list)  #R for Read         
        #^Of Shape [num_Read_Heads, batch_size, M]

        #TODO:: COMPLETE THE CONVOLUTION OPERATION IN FOCUSING AND THEN COMPLETE THIS CLASS

        NTM_output = self.NTM_ouput_gen_layer(controller_output)
        
        current_states = {
                            'All_Read_vectors' : All_R_Matrix,
                            'controller_state' : controller_state,
                            'Memory_Matrix' : M_prev,
                            'All_Weight_vectors' : All_W_Matrix
                         }
        
        return NTM_output, current_states


    def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
        initial_state = {
            'controller_state': [tf.random.normal((batch_size,self.rnn_size),stddev=0.05), tf.random.normal((batch_size,self.rnn_size),stddev=0.05)],
            'All_Read_vectors': tf.random.normal((self.num_read_heads,batch_size,self.memory_columns),stddev=0.05),
            'All_Weight_vectors': tf.random.normal((self.total_num_heads, batch_size, self.memory_rows),stddev=0.05),
            'Memory_Matrix': tf.random.normal((batch_size,self.memory_rows,self.memory_columns),stddev=0.05)
        }
        return initial_state

# TESTING!!!

In [34]:
cell = NTMCell(100, 120, 10, 2, 2, 8)

In [35]:
init_states = cell.get_initial_state(batch_size=30)

In [36]:
inputs = tf.random.uniform((30,8))

In [37]:
MM = cell(inputs,init_states)[1]['Memory_Matrix']

In [38]:
MM[-1]

<tf.Tensor: id=25978, shape=(120, 10), dtype=float32, numpy=
array([[-0.02890344, -0.03132596, -0.00637361, ..., -0.00243981,
         0.0699063 , -0.0854373 ],
       [-0.10745039,  0.09775566, -0.02284391, ..., -0.04651744,
        -0.02132764, -0.09054115],
       [-0.08107531,  0.04050905,  0.05728373, ...,  0.08315361,
        -0.10347494,  0.05334125],
       ...,
       [ 0.03123587,  0.01511593,  0.05789856, ...,  0.01908722,
         0.14911002,  0.02152878],
       [-0.02521733,  0.04013935, -0.04678963, ..., -0.06287666,
         0.01030562, -0.01379851],
       [ 0.04001869, -0.02702504, -0.00220842, ...,  0.10473741,
        -0.07951589, -0.01692304]], dtype=float32)>

In [30]:
init_states['Memory_Matrix'][-1]

<tf.Tensor: id=21766, shape=(120, 10), dtype=float32, numpy=
array([[ 0.11861812,  0.02786397,  0.03258739, ..., -0.07295991,
        -0.02765901,  0.06532305],
       [-0.04241162, -0.01079567, -0.03769175, ..., -0.06559222,
        -0.02021819, -0.00472933],
       [-0.02901254,  0.05621299, -0.02097247, ...,  0.02361124,
        -0.00780815, -0.02890104],
       ...,
       [-0.04769608, -0.03623661,  0.00921166, ...,  0.02488117,
        -0.01424608,  0.04344051],
       [ 0.06635999,  0.02696202, -0.02606978, ...,  0.05525872,
         0.0602299 ,  0.03645503],
       [ 0.09451155,  0.03362701,  0.01271943, ..., -0.09557761,
         0.04518897, -0.0020069 ]], dtype=float32)>

## It works!!!!!!

## Rough Work below

In [31]:
tf.compat.v1.nn.dynamic_rnn?

[0;31mSignature:[0m
[0mtf[0m[0;34m.[0m[0mcompat[0m[0;34m.[0m[0mv1[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mdynamic_rnn[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mcell[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minputs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msequence_length[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitial_state[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mparallel_iterations[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mswap_memory[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtime_major[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscope[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Creates a recurrent neural network specified by RNNCell `cell`. (depr

## Rough Work

In [62]:
batch_size = 54
features = 20
inputs = tf.random.uniform((batch_size,features))
n_RH = 2
n_WH = 2
N = 120
M = 28

In [63]:
M_prev = tf.random.uniform((N,M))

In [64]:
All_read_vectors = tf.random.uniform([n_RH,batch_size,M])

In [65]:
inputs

<tf.Tensor: id=889, shape=(54, 20), dtype=float32, numpy=
array([[0.50705016, 0.5538057 , 0.35466897, ..., 0.2511592 , 0.5219631 ,
        0.883775  ],
       [0.68511164, 0.5268614 , 0.03507745, ..., 0.1325407 , 0.8668597 ,
        0.0868988 ],
       [0.6197177 , 0.01878619, 0.05746126, ..., 0.85158   , 0.6232103 ,
        0.5296048 ],
       ...,
       [0.27210355, 0.27517486, 0.78952754, ..., 0.49804616, 0.36897254,
        0.9983697 ],
       [0.31133854, 0.36371803, 0.90826654, ..., 0.9750415 , 0.7073227 ,
        0.24617815],
       [0.4307872 , 0.87308276, 0.2203461 , ..., 0.92326057, 0.451658  ,
        0.18218124]], dtype=float32)>

In [66]:
tf.reshape(All_read_vectors, (54,56) )

<tf.Tensor: id=905, shape=(54, 56), dtype=float32, numpy=
array([[0.23105407, 0.11242044, 0.7150446 , ..., 0.29172194, 0.5120884 ,
        0.9246371 ],
       [0.65020096, 0.23843086, 0.9909525 , ..., 0.32537663, 0.94632316,
        0.02677977],
       [0.42424202, 0.43033516, 0.64849985, ..., 0.725616  , 0.46795988,
        0.00755632],
       ...,
       [0.4037726 , 0.03265584, 0.7736981 , ..., 0.6615019 , 0.7029762 ,
        0.9707041 ],
       [0.06914246, 0.05134463, 0.19207633, ..., 0.76326644, 0.14683652,
        0.66239476],
       [0.5249901 , 0.45983613, 0.7040168 , ..., 0.3792597 , 0.05599809,
        0.08590281]], dtype=float32)>

In [67]:
All_read_vectors[0]

<tf.Tensor: id=909, shape=(54, 28), dtype=float32, numpy=
array([[0.23105407, 0.11242044, 0.7150446 , ..., 0.36795807, 0.87961614,
        0.3911537 ],
       [0.34743667, 0.46957827, 0.95620465, ..., 0.29172194, 0.5120884 ,
        0.9246371 ],
       [0.65020096, 0.23843086, 0.9909525 , ..., 0.82961166, 0.68320835,
        0.85916567],
       ...,
       [0.6298901 , 0.84703267, 0.7298449 , ..., 0.7918333 , 0.8833169 ,
        0.4757222 ],
       [0.984496  , 0.18993235, 0.6181967 , ..., 0.11975944, 0.10704279,
        0.23925197],
       [0.45074725, 0.19454968, 0.11405873, ..., 0.67184734, 0.640313  ,
        0.9980916 ]], dtype=float32)>

In [68]:
All_read_vectors[1]

<tf.Tensor: id=913, shape=(54, 28), dtype=float32, numpy=
array([[0.47772777, 0.5548208 , 0.4386599 , ..., 0.37769878, 0.14366198,
        0.7036071 ],
       [0.32530928, 0.5653039 , 0.11393523, ..., 0.83864987, 0.65681624,
        0.9910178 ],
       [0.80309534, 0.90419984, 0.20029187, ..., 0.771958  , 0.5983671 ,
        0.06970429],
       ...,
       [0.55551815, 0.7331928 , 0.19058478, ..., 0.76326644, 0.14683652,
        0.66239476],
       [0.5249901 , 0.45983613, 0.7040168 , ..., 0.9855584 , 0.92810225,
        0.11906552],
       [0.26077294, 0.33528268, 0.55360246, ..., 0.3792597 , 0.05599809,
        0.08590281]], dtype=float32)>

In [69]:
a = [All_read_vectors[i] for i in range(All_read_vectors.shape[0])]

In [70]:
a.append(inputs)

In [71]:
tf.concat(a,axis=1)

<tf.Tensor: id=923, shape=(54, 76), dtype=float32, numpy=
array([[0.23105407, 0.11242044, 0.7150446 , ..., 0.2511592 , 0.5219631 ,
        0.883775  ],
       [0.34743667, 0.46957827, 0.95620465, ..., 0.1325407 , 0.8668597 ,
        0.0868988 ],
       [0.65020096, 0.23843086, 0.9909525 , ..., 0.85158   , 0.6232103 ,
        0.5296048 ],
       ...,
       [0.6298901 , 0.84703267, 0.7298449 , ..., 0.49804616, 0.36897254,
        0.9983697 ],
       [0.984496  , 0.18993235, 0.6181967 , ..., 0.9750415 , 0.7073227 ,
        0.24617815],
       [0.45074725, 0.19454968, 0.11405873, ..., 0.92326057, 0.451658  ,
        0.18218124]], dtype=float32)>

### Checking PMG Layer

In [73]:
PMG_example = tf.keras.layers.Dense(402, use_bias=True)

In [74]:
sample_mat = tf.random.uniform((batch_size,68))

In [75]:
PMG_example(sample_mat)

<tf.Tensor: id=956, shape=(54, 402), dtype=float32, numpy=
array([[-0.07402603, -0.23214918, -0.3221025 , ..., -0.2199063 ,
        -0.00714236, -0.10826056],
       [ 0.08625412, -0.09742586, -0.14603518, ...,  0.15982783,
         0.20689332, -0.08133702],
       [ 0.03982217, -0.06957905,  0.0901875 , ..., -0.09754752,
        -0.23741679,  0.0245591 ],
       ...,
       [ 0.11666788, -0.445926  , -0.17246212, ...,  0.03891951,
         0.15199912, -0.20190841],
       [-0.1753942 , -0.05898023, -0.21369556, ..., -0.06838971,
         0.07516391, -0.10743837],
       [-0.03807025, -0.25750196, -0.15965056, ...,  0.06720576,
         0.06254517, -0.02822962]], dtype=float32)>

In [76]:
tf.split(PMG_example(sample_mat), [100,200,102], axis = 1)

[<tf.Tensor: id=963, shape=(54, 100), dtype=float32, numpy=
 array([[-0.07402603, -0.23214918, -0.3221025 , ..., -0.02718471,
         -0.2287883 , -0.22531593],
        [ 0.08625412, -0.09742586, -0.14603518, ..., -0.13002236,
          0.12938936, -0.28014222],
        [ 0.03982217, -0.06957905,  0.0901875 , ...,  0.08692341,
         -0.11802785, -0.01594916],
        ...,
        [ 0.11666788, -0.445926  , -0.17246212, ...,  0.01403001,
         -0.04853438, -0.23401162],
        [-0.1753942 , -0.05898023, -0.21369556, ..., -0.05899006,
          0.01151963, -0.11807346],
        [-0.03807025, -0.25750196, -0.15965056, ..., -0.20590141,
          0.11449695, -0.08281406]], dtype=float32)>,
 <tf.Tensor: id=964, shape=(54, 200), dtype=float32, numpy=
 array([[ 0.10847281, -0.3400519 , -0.3329449 , ..., -0.34308356,
         -0.13200209, -0.01572341],
        [ 0.0518838 , -0.501912  , -0.04026916, ..., -0.47047797,
         -0.51460177,  0.14040598],
        [-0.26951241, -0.63644165

## For $s_t$

In [77]:
tf.nn.softmax(tf.random.uniform([50,2]), axis = 1)

<tf.Tensor: id=973, shape=(50, 2), dtype=float32, numpy=
array([[0.61756116, 0.38243884],
       [0.3762202 , 0.6237798 ],
       [0.41960782, 0.5803921 ],
       [0.40284595, 0.597154  ],
       [0.46406233, 0.53593767],
       [0.3400293 , 0.6599707 ],
       [0.46241152, 0.5375884 ],
       [0.54658407, 0.453416  ],
       [0.53350854, 0.46649146],
       [0.5239202 , 0.47607982],
       [0.42002782, 0.57997215],
       [0.5280102 , 0.47198975],
       [0.37605307, 0.623947  ],
       [0.5272042 , 0.47279578],
       [0.37759548, 0.6224045 ],
       [0.5847877 , 0.41521224],
       [0.59797734, 0.4020227 ],
       [0.53322417, 0.4667759 ],
       [0.5547479 , 0.4452521 ],
       [0.58802724, 0.4119728 ],
       [0.49093118, 0.5090687 ],
       [0.43030855, 0.5696915 ],
       [0.6254882 , 0.3745117 ],
       [0.47968352, 0.5203165 ],
       [0.41906884, 0.5809312 ],
       [0.5961334 , 0.40386656],
       [0.6589406 , 0.34105942],
       [0.4659075 , 0.53409255],
       [0.65774953,

## Inner for Loop sample test for each Head's Parameter Matrix

In [82]:
Head_PM = tf.random.uniform((batch_size,(3*M + 3 + 3)))

In [83]:
k_t, beta_t, g_t, s_t, gamma_t, a_t, e_t = tf.split(Head_PM, [M, 1, 1, 3, 1, M, M], axis = 1)
                
#For k_t:-
k_t = tf.tanh(k_t)
#For beta_t:-
beta_t = tf.sigmoid(beta_t)
#For g_t:-
g_t = tf.sigmoid(g_t)
#For s_t:-
s_t = tf.nn.softmax(s_t)
#The above s_t is one of the points where we can improve
#For gamma_t:-
gamma_t = tf.math.log(tf.exp(gamma_t) + 1) + 1
#For a_t:-
a_t = tf.tanh(a_t)
#For e_t:-
e_t = tf.sigmoid(e_t)

## Testing speed of Iteration through each input method

In [58]:
import Focusing

In [85]:
w_prev = tf.random.uniform((batch_size,N))

In [92]:
Head_Batches_W = []
for i in range(batch_size):
    w_t = Focusing.ContentFocusing(k_t[i], M_prev, beta_t[i]) #Note that Cosine Similarity is used
    Head_Batches_W.append(w_t)

In [95]:
Head_Batches_W

[<tf.Tensor: id=748137, shape=(120,), dtype=float32, numpy=
 array([0.0083496 , 0.00844226, 0.00837953, 0.00799721, 0.00809602,
        0.00815315, 0.00852315, 0.00802606, 0.00826572, 0.00851275,
        0.00875784, 0.00836179, 0.00793282, 0.00789693, 0.00836176,
        0.00805509, 0.00890856, 0.00789917, 0.00857033, 0.00820378,
        0.0083775 , 0.00873767, 0.00765469, 0.00803013, 0.00848346,
        0.00765481, 0.00847592, 0.00815333, 0.00798906, 0.00831225,
        0.00890363, 0.00834179, 0.00820031, 0.00872109, 0.008254  ,
        0.00857174, 0.00851068, 0.00806984, 0.0075203 , 0.00811863,
        0.00839786, 0.00807636, 0.00802825, 0.00899021, 0.00825039,
        0.00869264, 0.00861082, 0.0081123 , 0.00859944, 0.00821041,
        0.0081565 , 0.00825115, 0.00858121, 0.00871716, 0.00865266,
        0.00818608, 0.00855487, 0.00805925, 0.00805491, 0.00830091,
        0.00878169, 0.00816013, 0.00841228, 0.00842103, 0.00844912,
        0.00817155, 0.00861884, 0.00809527, 0.00863089, 

#### Result: Increadibly and Infeasibly slow and we have not even tested LocationFocusing!

#### Thus, we had to make a Vectorized Op for Batch Handling