# Batch Focusing

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
def ContentFocusing(k_t, M_prev, beta_t, K = None):
    
    '''
    Computes the Write Vector for whole batch through Content Attentioning in one go.
    
    k_t : (Batch_size,M), Key Vector generated by EITHER HEAD (in whichever HEAD this function is used in for addressing)
    M_prev : (Batch_size,N,M), Memory Matrix at time t.
    beta_t : (Batch_size,1), Key Strength hyperparameter
    K : Function, Similarity Measure, if None, Cosine Similarity will be used.
    
    RETURNS:
    
    w_ct : (Batch_size,N), Weighting after Content Focusing. 
    '''
    
    batch_size,N,M = M_prev.shape
    

    
    if K != None : 
            
        K_kt_Mprev = K(k_t,M_prev)
    
    else:
        K_kt_Mprev = tf.reduce_sum(tf.reshape(k_t,(batch_size,1,M))*M_prev, axis = 2) /  tf.multiply(tf.reshape(tf.linalg.norm(k_t,axis =1),(batch_size,1)),tf.linalg.norm(M_prev,axis = 2)) 
        #^ Of shape [batch_size, N]
    exp_vals = tf.exp(beta_t*K_kt_Mprev)
    w_ct = exp_vals / tf.reshape(tf.reduce_sum(exp_vals,axis = 1),(-1,1)) 
    
    assert w_ct.shape == (batch_size,N)
    
    return w_ct




## Rough Work

In [267]:
batch_size = 61
features = 10
inputs = tf.random.uniform((batch_size,features))
n_RH = 1
n_WH = 2
N = 90
M = 10
M_prev = tf.random.uniform((batch_size,N,M))
w_t = tf.random.uniform((batch_size,N))
a_t = tf.random.uniform((batch_size,M))
e_t = tf.random.uniform((batch_size,M))
k_t = tf.random.uniform((batch_size,M))
beta_t = tf.random.uniform((batch_size,1))
w_prev = tf.random.uniform((batch_size,N))
g_t = tf.random.uniform((batch_size,1))
s_t = tf.random.uniform((batch_size,3)) #let len(shift_range = 3)
gamma_t = tf.random.uniform((batch_size,1))

In [4]:
tf.multiply(k_t[0],M_prev[0])

<tf.Tensor: id=78, shape=(90, 10), dtype=float32, numpy=
array([[2.05493674e-01, 4.30796772e-01, 8.00636530e-01, 2.90890127e-01,
        2.03438140e-02, 2.84723818e-01, 5.93787171e-02, 2.68620905e-02,
        6.08717464e-03, 4.67993557e-01],
       [3.08563024e-01, 6.22952104e-01, 7.58889139e-01, 7.00889945e-01,
        1.17800213e-01, 2.61898994e-01, 1.19392000e-01, 4.17016819e-02,
        5.51945472e-04, 3.33412975e-01],
       [2.39747286e-01, 3.85779083e-01, 5.71780741e-01, 4.23119813e-01,
        1.22678958e-01, 1.49898782e-01, 2.11717144e-01, 1.55228311e-02,
        2.56567192e-03, 2.37495601e-01],
       [1.07483469e-01, 2.44085670e-01, 2.66609997e-01, 6.59644008e-01,
        9.81792659e-02, 3.70828599e-01, 1.59274265e-01, 4.75123860e-02,
        6.68983033e-04, 3.83947700e-01],
       [2.50789434e-01, 6.78528070e-01, 8.51687312e-01, 3.63416970e-02,
        1.65671006e-01, 3.57668638e-01, 1.20984100e-01, 2.11320650e-02,
        3.30919935e-03, 7.62967467e-02],
       [6.71993494

In [5]:
k_t[0]

<tf.Tensor: id=82, shape=(10,), dtype=float32, numpy=
array([0.32541203, 0.87419105, 0.9244362 , 0.82293713, 0.16608787,
       0.41414547, 0.5219244 , 0.06328118, 0.00619519, 0.48116386],
      dtype=float32)>

In [6]:
M_prev[0,0]

<tf.Tensor: id=86, shape=(10,), dtype=float32, numpy=
array([0.6314876 , 0.49279475, 0.8660809 , 0.35347795, 0.12248826,
       0.68749714, 0.11376882, 0.42448783, 0.98256505, 0.97262824],
      dtype=float32)>

In [7]:
a = np.array([tf.reduce_sum(tf.multiply(k_t[i],M_prev[i]), axis = 1) for i in range(batch_size)])

In [8]:
a.shape 

(61, 90)

In [9]:
b = tf.linalg.norm

In [10]:
b = tf.linalg.norm

In [11]:
b = tf.linalg.norm

In [12]:
c = tf.linalg.norm(M_prev,axis = 2)
c

<tf.Tensor: id=22722, shape=(61, 90), dtype=float32, numpy=
array([[2.027041 , 2.1642258, 1.6434479, ..., 1.723338 , 1.8016926,
        1.9084893],
       [1.9956269, 1.5014703, 1.7618327, ..., 2.1334832, 1.6714257,
        1.5563629],
       [1.4762534, 1.9331743, 1.9384431, ..., 1.8611721, 1.3271677,
        2.3665528],
       ...,
       [1.8282665, 1.8271585, 1.8608762, ..., 2.1229274, 1.3248271,
        2.0175529],
       [1.7941804, 1.4642141, 1.7083813, ..., 2.0990129, 2.5573628,
        2.1121614],
       [1.7976902, 1.6338319, 1.7932703, ..., 2.1311028, 1.9314992,
        1.5543519]], dtype=float32)>

In [13]:
d = tf.linalg.norm(k_t,axis =1)
d

<tf.Tensor: id=22727, shape=(61,), dtype=float32, numpy=
array([1.7632298, 2.3290856, 1.5850703, 1.7012767, 1.7760475, 2.0075939,
       2.1443708, 2.1399894, 2.07307  , 1.92934  , 1.5302684, 1.6068015,
       1.6128647, 2.058905 , 1.7653611, 1.7906436, 1.747843 , 2.129794 ,
       1.2911484, 1.3325299, 2.0616665, 1.939443 , 1.7552632, 1.6806258,
       2.0048482, 1.9816246, 2.1701431, 1.4865725, 1.7067194, 1.5858262,
       2.0027452, 1.5277145, 1.837927 , 2.0219283, 2.1345253, 2.025736 ,
       2.3264446, 1.796912 , 1.6987953, 1.8551291, 2.0930135, 1.9070629,
       1.8816838, 2.0237234, 1.5859659, 1.9742703, 1.7763777, 1.4644916,
       1.278213 , 2.1491597, 1.7826204, 1.724457 , 1.7122903, 2.0279088,
       1.9660336, 2.1259933, 1.7772762, 1.4175928, 2.370463 , 1.4534798,
       2.1097548], dtype=float32)>

In [14]:
tf.multiply(tf.reshape(tf.linalg.norm(k_t,axis =1),(batch_size,1)),tf.linalg.norm(M_prev,axis = 2)) #This is the denominator multiplied norm value

<tf.Tensor: id=22740, shape=(61, 90), dtype=float32, numpy=
array([[3.574139 , 3.8160276, 2.8977764, ..., 3.038641 , 3.176798 ,
        3.3651054],
       [4.647986 , 3.497053 , 4.1034594, ..., 4.9690647, 3.8928936,
        3.6249022],
       [2.3399653, 3.064217 , 3.0725684, ..., 2.9500885, 2.1036541,
        3.7511525],
       ...,
       [4.333838 , 4.331211 , 4.411138 , ..., 5.0323205, 3.1404533,
        4.782534 ],
       [2.607805 , 2.1282055, 2.4830976, ..., 3.0508728, 3.717075 ,
        3.069984 ],
       [3.7926855, 3.4469845, 3.7833607, ..., 4.4961042, 4.07499  ,
        3.2793014]], dtype=float32)>

K[.,.] should be of shape [68,100]

In [15]:
tf.reduce_sum(tf.reshape(k_t,(batch_size,1,M))*M_prev, axis = 2) #This is the dot product

<tf.Tensor: id=22745, shape=(61, 90), dtype=float32, numpy=
array([[2.5932064, 3.2660518, 2.3603058, ..., 2.4679694, 2.3849885,
        2.8541474],
       [4.0530243, 2.8802643, 3.647146 , ..., 4.4283714, 3.0079472,
        3.3189452],
       [1.5377616, 2.8815222, 2.4059458, ..., 2.471018 , 1.5910689,
        2.978387 ],
       ...,
       [3.7147613, 3.2942076, 3.3360345, ..., 4.4191713, 2.6649046,
        3.3275197],
       [2.228442 , 1.5200226, 1.9804798, ..., 1.9989941, 2.4690008,
        2.1676097],
       [3.3294933, 2.7179575, 2.5504918, ..., 3.2386963, 3.2300858,
        2.6896815]], dtype=float32)>

In [16]:
res = tf.reduce_sum(tf.reshape(k_t,(batch_size,1,M))*M_prev, axis = 2) /  tf.multiply(tf.reshape(tf.linalg.norm(k_t,axis =1),(batch_size,1)),tf.linalg.norm(M_prev,axis = 2)) 

In [17]:
res

<tf.Tensor: id=22764, shape=(61, 90), dtype=float32, numpy=
array([[0.72554713, 0.8558774 , 0.8145231 , ..., 0.8121951 , 0.75075233,
        0.8481599 ],
       [0.8719958 , 0.82362616, 0.8887979 , ..., 0.89118814, 0.77267647,
        0.91559577],
       [0.6571728 , 0.94037795, 0.7830406 , ..., 0.8376081 , 0.75633574,
        0.7939925 ],
       ...,
       [0.85715276, 0.7605742 , 0.75627524, ..., 0.8781578 , 0.8485732 ,
        0.69576496],
       [0.85452783, 0.7142274 , 0.7975844 , ..., 0.6552204 , 0.66423213,
        0.7060655 ],
       [0.8778722 , 0.78850293, 0.6741339 , ..., 0.72033393, 0.7926611 ,
        0.82019955]], dtype=float32)>

In [18]:
beta_t

<tf.Tensor: id=48, shape=(61, 1), dtype=float32, numpy=
array([[0.85407066],
       [0.04710746],
       [0.6117557 ],
       [0.08028185],
       [0.04349375],
       [0.911795  ],
       [0.73365855],
       [0.90934324],
       [0.63897693],
       [0.3613155 ],
       [0.77798855],
       [0.11972249],
       [0.62352276],
       [0.85664487],
       [0.3572116 ],
       [0.5357834 ],
       [0.53576124],
       [0.08292627],
       [0.82766294],
       [0.45116186],
       [0.6374458 ],
       [0.53476226],
       [0.60202   ],
       [0.00594819],
       [0.7454233 ],
       [0.31553423],
       [0.1317898 ],
       [0.7465235 ],
       [0.03908086],
       [0.34705806],
       [0.4855107 ],
       [0.12419808],
       [0.16215038],
       [0.9797547 ],
       [0.44559407],
       [0.6437837 ],
       [0.4934603 ],
       [0.30058002],
       [0.1892041 ],
       [0.8537741 ],
       [0.537444  ],
       [0.42458856],
       [0.0050317 ],
       [0.6613847 ],
       [0.5910659 ],

In [19]:
exp_vals = tf.exp(beta_t*res)
exp_vals / tf.reshape(tf.reduce_sum(exp_vals,axis = 1),(-1,1)) 

<tf.Tensor: id=22771, shape=(61, 90), dtype=float32, numpy=
array([[0.01111906, 0.01242825, 0.01199695, ..., 0.01197312, 0.01136101,
        0.0123466 ],
       [0.01112092, 0.0110956 , 0.01112972, ..., 0.01113097, 0.01106901,
        0.01114378],
       [0.01035664, 0.01231577, 0.01118561, ..., 0.01156531, 0.01100436,
        0.01126081],
       ...,
       [0.01113098, 0.01106075, 0.01105764, ..., 0.01114631, 0.01112472,
        0.01101388],
       [0.01238194, 0.01093216, 0.01177167, ..., 0.01037433, 0.01045764,
        0.01085324],
       [0.01126071, 0.0110672 , 0.01082439, ..., 0.01092183, 0.01107613,
        0.01113545]], dtype=float32)>

## Testing

In [20]:
ContentFocusing(k_t,M_prev,beta_t)

<tf.Tensor: id=22797, shape=(61, 90), dtype=float32, numpy=
array([[0.01111906, 0.01242825, 0.01199695, ..., 0.01197312, 0.01136101,
        0.0123466 ],
       [0.01112092, 0.0110956 , 0.01112972, ..., 0.01113097, 0.01106901,
        0.01114378],
       [0.01035664, 0.01231577, 0.01118561, ..., 0.01156531, 0.01100436,
        0.01126081],
       ...,
       [0.01113098, 0.01106075, 0.01105764, ..., 0.01114631, 0.01112472,
        0.01101388],
       [0.01238194, 0.01093216, 0.01177167, ..., 0.01037433, 0.01045764,
        0.01085324],
       [0.01126071, 0.0110672 , 0.01082439, ..., 0.01092183, 0.01107613,
        0.01113545]], dtype=float32)>

## Location Focusing

In [263]:
def LocationFocusing( k_t, M_prev, beta_t,    g_t, w_prev, s_t, gamma_t,   K = None):
    
    '''
    Computes the Write Vector for whole batch through Location Attentioning in one go.
    
    k_t, M_prev, b_t, K : SAME AS IN CONTENT FOCUSING
    g_t : (batch_size,1), Interpolation Gate in the range (0,1) emitted by HEAD IN USE.
    w_prev : (batch_size,N), Weight Vector produced by the HEAD IN USE at the previous time step.
    s_t : (batch_size, len(shift_range)), The weights emitted by the HEAD IN USE that defines the normalized distribution over the allowed integer shifts (which is shift_range object)
                
    gamma_t : (batch_size,1), Sharpening Factor >= 1    
    
    RETURNS:
    
    w_t : (batch_size,N), Final Weight Vector through Location Attentioning
    '''
    
    w_ct = ContentFocusing(k_t, M_prev, beta_t, K)
    
    batch_size,N,M = M_prev.shape
    
    #assert w_prev.shape == (N,)
    
    #Interpolation
    w_gt = g_t * w_ct + (1 - g_t) * w_prev
    
    #Convolutional Shift
       
    
        
        #TODO
        #The main Hurdle!!
    w_hat_t = tf.concat([Convolution(s_t[i],w_gt[i]) for i in range(batch_size)],axis = 0)
    #^Of shape [batch_size, N]
    
    
    #Sharpening
    powered = tf.pow(w_hat_t, g_t)
    w_t = powered / tf.reshape(tf.reduce_sum(powered, axis = 1),(-1,1))
    
    return w_t

In [32]:
w_gt = g_t * ContentFocusing(k_t, M_prev, beta_t) + (1 - g_t) * w_prev

In [33]:
w_gt

<tf.Tensor: id=22872, shape=(61, 90), dtype=float32, numpy=
array([[0.35533756, 0.6894588 , 0.80479175, ..., 0.6349884 , 0.55541885,
        0.24350293],
       [0.02859625, 0.02614178, 0.03149249, ..., 0.04450782, 0.01449552,
        0.0345098 ],
       [0.19266987, 0.04535276, 0.01200723, ..., 0.15658332, 0.25237033,
        0.05255016],
       ...,
       [0.13885812, 0.13401778, 0.36846238, ..., 0.05631729, 0.13022679,
        0.01310542],
       [0.0923833 , 0.13903259, 0.31079543, ..., 0.20548564, 0.32254213,
        0.0272678 ],
       [0.1789712 , 0.3346631 , 0.2509143 , ..., 0.12973386, 0.2217105 ,
        0.04943626]], dtype=float32)>

In [23]:
res = tf.pow(w_prev,g_t)

In [24]:
res / tf.reshape(tf.reduce_sum(res, axis = 1),(-1,1))

<tf.Tensor: id=22834, shape=(61, 90), dtype=float32, numpy=
array([[0.0111473 , 0.01182031, 0.01198312, ..., 0.01173466, 0.01159663,
        0.01077986],
       [0.01064577, 0.00924526, 0.0123014 , ..., 0.01967518, 0.00240065,
        0.01401496],
       [0.01539079, 0.00458834, 0.00084535, ..., 0.01304287, 0.01891349,
        0.00531914],
       ...,
       [0.00844763, 0.00830226, 0.01356698, ..., 0.00535579, 0.00818466,
        0.00221195],
       [0.00703477, 0.00917124, 0.01505221, ..., 0.01170263, 0.01542002,
        0.00303403],
       [0.01280224, 0.01976956, 0.016215  , ..., 0.01020945, 0.0148739 ,
        0.00497202]], dtype=float32)>

In [25]:
a = []
a.append(w_prev)
a.append(w_t)
tf.convert_to_tensor(a,dtype = tf.float32)

<tf.Tensor: id=22835, shape=(2, 61, 90), dtype=float32, numpy=
array([[[0.38866937, 0.7550179 , 0.8815607 , ..., 0.69531703,
         0.6081017 , 0.26588655],
        [0.50779283, 0.43872762, 0.5898665 , ..., 0.95974493,
         0.10845494, 0.67523694],
        [0.7172704 , 0.14041567, 0.0143714 , ..., 0.57386804,
         0.9468932 , 0.17135894],
        ...,
        [0.25378418, 0.2446518 , 0.69004726, ..., 0.09696114,
         0.23739219, 0.01498735],
        [0.21098924, 0.32894778, 0.75411284, ..., 0.49474776,
         0.78522277, 0.0516032 ],
        [0.52254856, 0.99759245, 0.74277055, ..., 0.37313616,
         0.65322316, 0.1279006 ]],

       [[0.7121161 , 0.924096  , 0.10493314, ..., 0.41378474,
         0.60936666, 0.74778235],
        [0.8670503 , 0.00983989, 0.00741589, ..., 0.6879052 ,
         0.16938496, 0.49485922],
        [0.55171156, 0.9524673 , 0.13141167, ..., 0.13219249,
         0.2656126 , 0.17225206],
        ...,
        [0.0515666 , 0.8248035 , 0.7360097 , 

# Tackling Convolution

In [28]:
s_t = tf.nn.softmax(s_t,axis = 1)

In [29]:
s_t

<tf.Tensor: id=22837, shape=(61, 3), dtype=float32, numpy=
array([[0.3508521 , 0.35592723, 0.2932207 ],
       [0.29765448, 0.2027476 , 0.49959788],
       [0.3778228 , 0.19784515, 0.42433208],
       [0.26167667, 0.37978873, 0.3585346 ],
       [0.25392237, 0.3116903 , 0.4343873 ],
       [0.37675145, 0.29659584, 0.32665262],
       [0.4504265 , 0.22946675, 0.32010677],
       [0.4685757 , 0.28320727, 0.24821702],
       [0.27434602, 0.49796346, 0.22769052],
       [0.3014842 , 0.4085322 , 0.28998363],
       [0.31298846, 0.2481137 , 0.43889785],
       [0.3437103 , 0.35235316, 0.30393663],
       [0.3123308 , 0.33182576, 0.35584342],
       [0.34683648, 0.36951542, 0.2836481 ],
       [0.2955191 , 0.25643033, 0.44805053],
       [0.42602712, 0.25265813, 0.3213147 ],
       [0.46359792, 0.28226194, 0.25414014],
       [0.24860227, 0.31895885, 0.43243885],
       [0.19122706, 0.42353   , 0.38524297],
       [0.32554874, 0.30551052, 0.3689407 ],
       [0.41641364, 0.25902247, 0.3245638

In [30]:
s = s_t[0]

In [31]:
s

<tf.Tensor: id=22841, shape=(3,), dtype=float32, numpy=array([0.3508521 , 0.35592723, 0.2932207 ], dtype=float32)>

In [37]:
w = w_gt[0]

In [38]:
w

<tf.Tensor: id=22884, shape=(90,), dtype=float32, numpy=
array([0.35533756, 0.6894588 , 0.80479175, 0.08821831, 0.51927435,
       0.67886406, 0.03825376, 0.6502476 , 0.41987446, 0.02311977,
       0.25427988, 0.24537571, 0.8367311 , 0.08455674, 0.75147593,
       0.5446072 , 0.02639749, 0.21034698, 0.6594261 , 0.44869885,
       0.14429632, 0.3806105 , 0.20494777, 0.8730608 , 0.11782522,
       0.22068135, 0.38754243, 0.8381731 , 0.25545573, 0.44642666,
       0.08851904, 0.65640014, 0.39040038, 0.7380051 , 0.43247446,
       0.08930185, 0.8780038 , 0.15075652, 0.24732804, 0.3504592 ,
       0.4211876 , 0.72341764, 0.00113347, 0.5999049 , 0.14582388,
       0.86452305, 0.55796105, 0.788396  , 0.40471986, 0.29379642,
       0.05899411, 0.3212133 , 0.800506  , 0.5228989 , 0.16447848,
       0.14476994, 0.12447245, 0.65464777, 0.26551908, 0.43885332,
       0.29979315, 0.7983517 , 0.18054599, 0.07841583, 0.8827569 ,
       0.4798715 , 0.6829637 , 0.9067673 , 0.8259656 , 0.8829714 ,
     

We want circular convolution between $s$ and $w$.

In [39]:
convolution_matrix = tf.random.uniform((w.shape[0],w.shape[0]))

In [40]:
convolution_matrix

<tf.Tensor: id=22891, shape=(90, 90), dtype=float32, numpy=
array([[0.63636076, 0.8686743 , 0.39349306, ..., 0.9716097 , 0.71206   ,
        0.36838627],
       [0.66289926, 0.8393798 , 0.16666293, ..., 0.52293646, 0.0539335 ,
        0.24759817],
       [0.42545938, 0.8948518 , 0.47508836, ..., 0.25450087, 0.4378693 ,
        0.77868295],
       ...,
       [0.14270055, 0.4661255 , 0.15323901, ..., 0.6755384 , 0.5056366 ,
        0.6806333 ],
       [0.4419961 , 0.59564686, 0.45988333, ..., 0.2602501 , 0.7185241 ,
        0.32736146],
       [0.80200136, 0.73216176, 0.53214264, ..., 0.25621212, 0.58739364,
        0.8871223 ]], dtype=float32)>

In [43]:
a = tf.convert_to_tensor([1,3,5,7,9])

In [62]:
b = a%3
#interpret 0 as 3

In [63]:
b

<tf.Tensor: id=23274, shape=(5,), dtype=int32, numpy=array([1, 0, 2, 1, 0], dtype=int32)>

Odd | Even | Odd
1      2     3
3      4     5
5      6     7 
7      8     9
9     10     11

len(shift_range) == 3, thus, above three columns will repeat untill count reaches upto w.shape[0]

### 1.1. To Make the Repeating Columns' Matrix

In [86]:
#This will be of size [N, len(shift_range)]
test = ([i for i in range(1,N*2,2)], [i for i in range(2,2*N+1,2)], [i for i in range(3, 2*N + 2, 2)])

In [87]:
repeat_matrix = tf.transpose(tf.convert_to_tensor(test))

In [89]:
first_mat = tf.concat([repeat_matrix for _ in range(int(N/3))],axis = 1)

In [90]:
first_mat

<tf.Tensor: id=23296, shape=(90, 90), dtype=int32, numpy=
array([[  1,   2,   3, ...,   1,   2,   3],
       [  3,   4,   5, ...,   3,   4,   5],
       [  5,   6,   7, ...,   5,   6,   7],
       ...,
       [175, 176, 177, ..., 175, 176, 177],
       [177, 178, 179, ..., 177, 178, 179],
       [179, 180, 181, ..., 179, 180, 181]], dtype=int32)>

In [92]:
N_p = 92

In [93]:
N_p%3

2

In [102]:
repeat_matrix

<tf.Tensor: id=23292, shape=(90, 3), dtype=int32, numpy=
array([[  1,   2,   3],
       [  3,   4,   5],
       [  5,   6,   7],
       [  7,   8,   9],
       [  9,  10,  11],
       [ 11,  12,  13],
       [ 13,  14,  15],
       [ 15,  16,  17],
       [ 17,  18,  19],
       [ 19,  20,  21],
       [ 21,  22,  23],
       [ 23,  24,  25],
       [ 25,  26,  27],
       [ 27,  28,  29],
       [ 29,  30,  31],
       [ 31,  32,  33],
       [ 33,  34,  35],
       [ 35,  36,  37],
       [ 37,  38,  39],
       [ 39,  40,  41],
       [ 41,  42,  43],
       [ 43,  44,  45],
       [ 45,  46,  47],
       [ 47,  48,  49],
       [ 49,  50,  51],
       [ 51,  52,  53],
       [ 53,  54,  55],
       [ 55,  56,  57],
       [ 57,  58,  59],
       [ 59,  60,  61],
       [ 61,  62,  63],
       [ 63,  64,  65],
       [ 65,  66,  67],
       [ 67,  68,  69],
       [ 69,  70,  71],
       [ 71,  72,  73],
       [ 73,  74,  75],
       [ 75,  76,  77],
       [ 77,  78,  79],
       

In [242]:
s = tf.nn.softmax(tf.random.uniform((5,)))
s

<tf.Tensor: id=23866, shape=(5,), dtype=float32, numpy=
array([0.2350838 , 0.20497374, 0.1416037 , 0.23723076, 0.18110798],
      dtype=float32)>

In [250]:
w

<tf.Tensor: id=22884, shape=(90,), dtype=float32, numpy=
array([0.35533756, 0.6894588 , 0.80479175, 0.08821831, 0.51927435,
       0.67886406, 0.03825376, 0.6502476 , 0.41987446, 0.02311977,
       0.25427988, 0.24537571, 0.8367311 , 0.08455674, 0.75147593,
       0.5446072 , 0.02639749, 0.21034698, 0.6594261 , 0.44869885,
       0.14429632, 0.3806105 , 0.20494777, 0.8730608 , 0.11782522,
       0.22068135, 0.38754243, 0.8381731 , 0.25545573, 0.44642666,
       0.08851904, 0.65640014, 0.39040038, 0.7380051 , 0.43247446,
       0.08930185, 0.8780038 , 0.15075652, 0.24732804, 0.3504592 ,
       0.4211876 , 0.72341764, 0.00113347, 0.5999049 , 0.14582388,
       0.86452305, 0.55796105, 0.788396  , 0.40471986, 0.29379642,
       0.05899411, 0.3212133 , 0.800506  , 0.5228989 , 0.16447848,
       0.14476994, 0.12447245, 0.65464777, 0.26551908, 0.43885332,
       0.29979315, 0.7983517 , 0.18054599, 0.07841583, 0.8827569 ,
       0.4798715 , 0.6829637 , 0.9067673 , 0.8259656 , 0.8829714 ,
     

## Final Function for Convolution

In [262]:
def Convolution(s_t, w_gt):
    '''
    s_t: (len(shift_range),) ; Shift Weighting of the particualar input
    w_gt: (N,) ; W_gt Vector for the particualar input as calculated in the Focusing function
    
    RETURNS:
    
    w_hat_t: (1,N) ; Vector found after Circular Convolution of s_t on w_gt
    '''
    LSR = len(s_t) #length of shift range
    test = []
    for i in range(LSR):
        test.append([j for j in range(i+1, 2*N + i, 2)])
    repeat_matrix = tf.transpose(tf.convert_to_tensor(test))
    if N%LSR != 0:
        repeat_matrix = tf.concat([repeat_matrix,tf.transpose(test[:N%LSR])],axis = 1)

    index_mat = np.array(repeat_matrix%LSR,dtype = np.float32)

    for i in range(LSR):
        index_mat[index_mat == (i+1)%LSR] = s[i]

    res = tf.matmul(tf.reshape(w_gt,(1,-1)),index_mat)

    final_result = tf.concat([res for _ in range(int(N/LSR))], axis = 1)
    if N%LSR != 0:
        final_result = tf.concat([final_result,res[:,:N%LSR]], axis = 1)
        
    return final_result

In [254]:
s

<tf.Tensor: id=23866, shape=(5,), dtype=float32, numpy=
array([0.2350838 , 0.20497374, 0.1416037 , 0.23723076, 0.18110798],
      dtype=float32)>

In [248]:
final_result

<tf.Tensor: id=23897, shape=(1, 90), dtype=float32, numpy=
array([[8.0356655, 8.2795   , 8.207101 , 8.076358 , 8.281372 , 8.0356655,
        8.2795   , 8.207101 , 8.076358 , 8.281372 , 8.0356655, 8.2795   ,
        8.207101 , 8.076358 , 8.281372 , 8.0356655, 8.2795   , 8.207101 ,
        8.076358 , 8.281372 , 8.0356655, 8.2795   , 8.207101 , 8.076358 ,
        8.281372 , 8.0356655, 8.2795   , 8.207101 , 8.076358 , 8.281372 ,
        8.0356655, 8.2795   , 8.207101 , 8.076358 , 8.281372 , 8.0356655,
        8.2795   , 8.207101 , 8.076358 , 8.281372 , 8.0356655, 8.2795   ,
        8.207101 , 8.076358 , 8.281372 , 8.0356655, 8.2795   , 8.207101 ,
        8.076358 , 8.281372 , 8.0356655, 8.2795   , 8.207101 , 8.076358 ,
        8.281372 , 8.0356655, 8.2795   , 8.207101 , 8.076358 , 8.281372 ,
        8.0356655, 8.2795   , 8.207101 , 8.076358 , 8.281372 , 8.0356655,
        8.2795   , 8.207101 , 8.076358 , 8.281372 , 8.0356655, 8.2795   ,
        8.207101 , 8.076358 , 8.281372 , 8.0356655, 8

In [224]:
LSR = 3
test = []
for i in range(LSR):
    test.append([j for j in range(i+1, 2*N + i, 2)])
repeat_matrix = tf.transpose(tf.convert_to_tensor(test))

In [229]:
for i in range(LSR):
    index_mat[index_mat == (i+1)%LSR] = s[i]

## Convolution on Whole Batch

In [255]:
w_gt

<tf.Tensor: id=22872, shape=(61, 90), dtype=float32, numpy=
array([[0.35533756, 0.6894588 , 0.80479175, ..., 0.6349884 , 0.55541885,
        0.24350293],
       [0.02859625, 0.02614178, 0.03149249, ..., 0.04450782, 0.01449552,
        0.0345098 ],
       [0.19266987, 0.04535276, 0.01200723, ..., 0.15658332, 0.25237033,
        0.05255016],
       ...,
       [0.13885812, 0.13401778, 0.36846238, ..., 0.05631729, 0.13022679,
        0.01310542],
       [0.0923833 , 0.13903259, 0.31079543, ..., 0.20548564, 0.32254213,
        0.0272678 ],
       [0.1789712 , 0.3346631 , 0.2509143 , ..., 0.12973386, 0.2217105 ,
        0.04943626]], dtype=float32)>

In [256]:
s_t

<tf.Tensor: id=22837, shape=(61, 3), dtype=float32, numpy=
array([[0.3508521 , 0.35592723, 0.2932207 ],
       [0.29765448, 0.2027476 , 0.49959788],
       [0.3778228 , 0.19784515, 0.42433208],
       [0.26167667, 0.37978873, 0.3585346 ],
       [0.25392237, 0.3116903 , 0.4343873 ],
       [0.37675145, 0.29659584, 0.32665262],
       [0.4504265 , 0.22946675, 0.32010677],
       [0.4685757 , 0.28320727, 0.24821702],
       [0.27434602, 0.49796346, 0.22769052],
       [0.3014842 , 0.4085322 , 0.28998363],
       [0.31298846, 0.2481137 , 0.43889785],
       [0.3437103 , 0.35235316, 0.30393663],
       [0.3123308 , 0.33182576, 0.35584342],
       [0.34683648, 0.36951542, 0.2836481 ],
       [0.2955191 , 0.25643033, 0.44805053],
       [0.42602712, 0.25265813, 0.3213147 ],
       [0.46359792, 0.28226194, 0.25414014],
       [0.24860227, 0.31895885, 0.43243885],
       [0.19122706, 0.42353   , 0.38524297],
       [0.32554874, 0.30551052, 0.3689407 ],
       [0.41641364, 0.25902247, 0.3245638

In [261]:
tf.concat([Convolution(s_t[i],w_gt[i]) for i in range(batch_size)],axis = 0)

<tf.Tensor: id=95035, shape=(61, 90), dtype=float32, numpy=
array([[7.871441  , 7.974221  , 7.932649  , ..., 7.871441  , 7.974221  ,
        7.932649  ],
       [0.5158037 , 0.510467  , 0.51830107, ..., 0.5158037 , 0.510467  ,
        0.51830107],
       [2.3188183 , 2.3532133 , 2.3042262 , ..., 2.3188183 , 2.3532133 ,
        2.3042262 ],
       ...,
       [4.7748456 , 4.8092613 , 4.919135  , ..., 4.7748456 , 4.8092613 ,
        4.919135  ],
       [3.6825395 , 3.713816  , 3.6298757 , ..., 3.6825395 , 3.713816  ,
        3.6298757 ],
       [2.7274227 , 2.8051114 , 2.7644281 , ..., 2.7274227 , 2.8051114 ,
        2.7644281 ]], dtype=float32)>

# Testing Location Focusing

In [271]:
LocationFocusing( k_t, M_prev, beta_t,    g_t, w_prev, s_t, gamma_t,   K = None)

<tf.Tensor: id=102902, shape=(61, 90), dtype=float32, numpy=
array([[0.01110817, 0.01110876, 0.01111641, ..., 0.01110817, 0.01110876,
        0.01111641],
       [0.01099981, 0.01120573, 0.0111278 , ..., 0.01099981, 0.01120573,
        0.0111278 ],
       [0.01116099, 0.01097047, 0.01120188, ..., 0.01116099, 0.01097047,
        0.01120188],
       ...,
       [0.01121498, 0.01105435, 0.011064  , ..., 0.01121498, 0.01105435,
        0.011064  ],
       [0.01111365, 0.01110223, 0.01111746, ..., 0.01111365, 0.01110223,
        0.01111746],
       [0.01106231, 0.01121623, 0.01105479, ..., 0.01106231, 0.01121623,
        0.01105479]], dtype=float32)>

# Final Function

In [273]:
def LocationFocusing( k_t, M_prev, beta_t,    g_t, w_prev, s_t, gamma_t,   K = None):
    
    '''
    Computes the Write Vector for whole batch through Location Attentioning in one go.
    
    k_t, M_prev, b_t, K : SAME AS IN CONTENT FOCUSING
    g_t : (batch_size,1), Interpolation Gate in the range (0,1) emitted by HEAD IN USE.
    w_prev : (batch_size,N), Weight Vector produced by the HEAD IN USE at the previous time step.
    s_t : (batch_size, len(shift_range)), The weights emitted by the HEAD IN USE that defines the normalized distribution over the allowed integer shifts (which is shift_range object)
                
    gamma_t : (batch_size,1), Sharpening Factor >= 1    
    
    RETURNS:
    
    w_t : (batch_size,N), Final Weight Vector through Location Attentioning
    '''
    
    w_ct = ContentFocusing(k_t, M_prev, beta_t, K)
    
    batch_size,N,M = M_prev.shape
    
    #assert w_prev.shape == (N,)
    
    #Interpolation
    w_gt = g_t * w_ct + (1 - g_t) * w_prev
    
    #Convolutional Shift
        #The main Hurdle!!
    w_hat_t = tf.concat([Convolution(s_t[i],w_gt[i]) for i in range(batch_size)],axis = 0)
    #^Of shape [batch_size, N]
    
    
    #Sharpening
    powered = tf.pow(w_hat_t, g_t)
    w_t = powered / tf.reshape(tf.reduce_sum(powered, axis = 1),(-1,1))
    
    return w_t


def Convolution(s_t, w_gt):
    '''
    The Circular Convolutor.
    
    s_t: (len(shift_range),) ; Shift Weighting of the particualar input
    w_gt: (N,) ; W_gt Vector for the particualar input as calculated in the Focusing function
    
    RETURNS:
    
    w_hat_t: (1,N) ; Vector found after Circular Convolution of s_t on w_gt
    '''
    LSR = len(s_t) #length of shift range
    test = []
    for i in range(LSR):
        test.append([j for j in range(i+1, 2*N + i, 2)])
    repeat_matrix = tf.transpose(tf.convert_to_tensor(test))
    if N%LSR != 0:
        repeat_matrix = tf.concat([repeat_matrix,tf.transpose(test[:N%LSR])],axis = 1)

    index_mat = np.array(repeat_matrix%LSR,dtype = np.float32)

    for i in range(LSR):
        index_mat[index_mat == (i+1)%LSR] = s[i]

    res = tf.matmul(tf.reshape(w_gt,(1,-1)),index_mat)

    final_result = tf.concat([res for _ in range(int(N/LSR))], axis = 1)
    if N%LSR != 0:
        final_result = tf.concat([final_result,res[:,:N%LSR]], axis = 1)
        
    return final_result