# Neural Turing Machine

## Focusing and Weights Production

### 1. Focusing By Content 

In [1]:
import tensorflow as tf
import numpy as np

In [34]:
def ContentFocusing(k_t, M_t, b_t, K = None):
    
    '''
    k_t : (M,), Key Vector generated by EITHER HEAD (in whichever HEAD this function is used in for addressing)
    M_t : (N,M), Memory Matrix at time t.
    b_t : Scalar, Key Strength hyperparameter
    K : Function, Similarity Measure, if None, Cosine Similarity will be used.
    
    RETURNS:
    
    w_ct : (N,), Weighting after Content Focusing. 
    '''
    
    N,M = M_t.shape
    
    assert k_t.shape == (M,)
    
    if K == None : 
        
        def Cosine_Similarity(u,v):
            u,v = tf.reshape(u,(1,-1)),tf.reshape(v,(-1,1))
            return np.dot(u,v)/(np.linalg.norm(u)*np.linalg.norm(v))
        K = Cosine_Similarity
        
    Applied_K_Vector = np.apply_along_axis(K,1,M_t,tf.reshape(k_t,(1,M))).reshape(-1,1)
    exp_of_AKV = tf.exp(b_t * Applied_K_Vector)                             #AKV for Applied K Vector
    w_ct = exp_of_AKV/np.sum(exp_of_AKV)
    
    assert w_ct.shape == (N,1)
    
    return tf.reshape(w_ct,(N,))

#### Testing

In [7]:
N = 10
M = 5

In [11]:
k_t = tf.random.uniform((M,))
b_t = 1
M_t = tf.random.uniform((N,M))

In [13]:
M_t

<tf.Tensor: id=23, shape=(10, 5), dtype=float32, numpy=
array([[0.15168643, 0.5299089 , 0.42401373, 0.8561846 , 0.02457011],
       [0.75537217, 0.8290411 , 0.14860976, 0.79608643, 0.41153967],
       [0.9069629 , 0.89466023, 0.4181559 , 0.29921603, 0.1102289 ],
       [0.68157077, 0.07642555, 0.7616246 , 0.52278054, 0.7089381 ],
       [0.71060276, 0.20000505, 0.753319  , 0.6242901 , 0.7561432 ],
       [0.85267484, 0.46078706, 0.18122256, 0.8119124 , 0.91077876],
       [0.30243218, 0.82506585, 0.6806989 , 0.52212954, 0.58182204],
       [0.9636531 , 0.73690915, 0.04979634, 0.8495326 , 0.7736132 ],
       [0.10018981, 0.17872894, 0.42052138, 0.64439833, 0.2032851 ],
       [0.69255817, 0.20251226, 0.1986072 , 0.4592415 , 0.598354  ]],
      dtype=float32)>

In [14]:
k_t

<tf.Tensor: id=16, shape=(5,), dtype=float32, numpy=
array([0.2839321 , 0.66500413, 0.9848646 , 0.6066334 , 0.02315331],
      dtype=float32)>

In [24]:
ContentFocusing(k_t, M_t, b_t)

<tf.Tensor: id=97, shape=(10,), dtype=float32, numpy=
array([0.11594453, 0.09759745, 0.10331585, 0.09827673, 0.10120873,
       0.08538678, 0.11552462, 0.08628917, 0.11229491, 0.0841612 ],
      dtype=float32)>

### 2. Focusing By Location

In [51]:
def LocationFocusing( k_t, M_t, b_t,    g_t, w_prev, s_t, gamma_t,   K = None,):
    
    '''
    k_t, M_t, b_t, K : SAME AS IN CONTENT FOCUSING
    g_t : Scalar, Interpolation Gate in the range (0,1) emitted by HEAD IN USE.
    w_prev : (N,), Weight Vector produced by the HEAD IN USE at the previous time step.
    s_t : (N,), The weights emitted by the HEAD IN USE that defines the normalized distribution over the allowed integer shifts (which is shift_range object)
                NOTE: s_t is supposed to be padded by zeroes for all the elements not in the shift_range, thus making it a length N vector from length len(shift_range) vector.
    gamma_t : Scalar, Sharpening Factor >= 1    
    
    RETURNS:
    
    w_t : (N,), Final Weight Vector 
    '''
    
    w_ct = ContentFocusing(k_t, M_t, b_t, K)
    N,M = M_t.shape
    
    assert w_prev.shape == (N,)
    
    #Interpolation
    w_gt = g_t * w_ct + (1 - g_t) * w_prev
    
    #Convolutional Shift
    w_hat_t = np.zeros(N)                       #These loops will limit the speed clearly, it would be good to wrap them in C (or find an alternative function)
    for i in range(N):
        for j in range(N):
            w_hat_t[i] += w_gt[j]*s_t[i-j]
    
    #Sharpening
    powered = tf.pow(w_hat_t,gamma_t)
    w_t = powered/np.sum(powered)
    
    return w_t

#### Testing

In [55]:
g_t = 0.8
w_prev = tf.random.uniform((N,))
s_t = tf.random.uniform((N,))
gamma_t = 2.4423

In [56]:
LocationFocusing( k_t, M_t, b_t,    g_t, w_prev, s_t, gamma_t)

<tf.Tensor: id=62863, shape=(10,), dtype=float64, numpy=
array([0.11293619, 0.09017039, 0.11173104, 0.08184061, 0.07534759,
       0.1112131 , 0.0743389 , 0.11282455, 0.1379482 , 0.09164943])>