# Custom Implementation of PositionEncoding

In [34]:
import numpy as np

In [35]:
def getPositionEncoding(seq_len, d, n=10000):
    P = np.zeros((seq_len,d))
    for k in range(seq_len):
        for i in np.arange(int(d/2)):
            denominator = np.power(n, 2*i/d)
            P[k,2*i] = np.sin(k/denominator)
            P[k,2*i+1] = np.cos(k/denominator)
            
    return P

In [36]:
P = getPositionEncoding(seq_len=4, d=4, n=100)
print(P)

[[ 0.          1.          0.          1.        ]
 [ 0.84147098  0.54030231  0.09983342  0.99500417]
 [ 0.90929743 -0.41614684  0.19866933  0.98006658]
 [ 0.14112001 -0.9899925   0.29552021  0.95533649]]


## Positional Encoding Layer in Keras

In [37]:
import tensorflow as tf
from tensorflow import convert_to_tensor, string
from tensorflow.keras.layers import TextVectorization, Embedding, Layer
from tensorflow.data import Dataset

import numpy as np
import matplotlib.pyplot as plt

In [38]:
output_sequence_length = 5
vocab_size = 10
sentences = [['i am a robot'],['you too robot']]
sentence_data = Dataset.from_tensor_slices(sentences)

#Create Text Vectorization layer
vectorize_layer = TextVectorization(output_sequence_length=output_sequence_length,
                                   max_tokens=vocab_size)

#Train the layer to create a dictionary
vectorize_layer.adapt(sentence_data)

#convert all senetences to tensors
word_tensors = convert_to_tensor(sentences,dtype=tf.string)

# Use the word tensors to get vectorized phrases 
vectorized_words = vectorize_layer(word_tensors)

print("Vocabulary: ", vectorize_layer.get_vocabulary()) 
print("Vectorized words: ", vectorized_words)

Vocabulary:  ['', '[UNK]', 'robot', 'you', 'too', 'i', 'am', 'a']
Vectorized words:  tf.Tensor(
[[5 6 7 2 0]
 [3 4 2 0 0]], shape=(2, 5), dtype=int64)


#### Word Embedding

In [39]:
output_length = 6
word_embedding_layer = Embedding(vocab_size, output_length)
embedded_words = word_embedding_layer(vectorized_words)
print(embedded_words)

tf.Tensor(
[[[-0.01465261 -0.02443929 -0.02567568  0.01612092 -0.0180501
    0.01221377]
  [ 0.04108001 -0.0425668   0.03379712  0.02587621  0.04580636
    0.01414463]
  [ 0.03896799  0.03274581 -0.01577972  0.01959071  0.03781739
   -0.0205436 ]
  [ 0.04765031  0.04046229  0.03540636 -0.04586948 -0.04644794
   -0.03318375]
  [-0.037299   -0.03110769  0.02678451 -0.00517948  0.04170302
   -0.02068119]]

 [[ 0.02332988  0.036721   -0.00412661 -0.00083592  0.0074178
   -0.02291187]
  [-0.01510976 -0.03449613  0.04011663  0.01848065 -0.00095644
   -0.00769093]
  [ 0.04765031  0.04046229  0.03540636 -0.04586948 -0.04644794
   -0.03318375]
  [-0.037299   -0.03110769  0.02678451 -0.00517948  0.04170302
   -0.02068119]
  [-0.037299   -0.03110769  0.02678451 -0.00517948  0.04170302
   -0.02068119]]], shape=(2, 5, 6), dtype=float32)


#### Positional Embedding

In [40]:
position_embedding_layer = Embedding(output_sequence_length, output_length)
position_indices = tf.range(output_sequence_length)
embedded_indices = position_embedding_layer(position_indices)
print(embedded_indices)

tf.Tensor(
[[ 0.02561091 -0.03799593 -0.01213596 -0.03664383  0.0425745  -0.03410308]
 [ 0.03241188  0.03786621 -0.04308493 -0.02314249  0.03154607 -0.0133027 ]
 [-0.02864889 -0.00165879  0.01828745  0.01672513  0.03201916  0.00918214]
 [-0.01671242  0.04078529 -0.02116981 -0.02729063 -0.03995778 -0.03068408]
 [-0.01710922 -0.0359491  -0.02147167  0.04926025 -0.03485603  0.02914684]], shape=(5, 6), dtype=float32)


#### Output of Positional Encoding Layer in Transformers
Pos.Encoding = Word_Embedding + Positional Embedding

In [41]:
final_output_embedding = embedded_words + embedded_indices
print("Final output: ", final_output_embedding)

Final output:  tf.Tensor(
[[[ 0.0109583  -0.06243522 -0.03781164 -0.0205229   0.0245244
   -0.02188932]
  [ 0.07349189 -0.00470059 -0.00928781  0.00273372  0.07735243
    0.00084194]
  [ 0.0103191   0.03108703  0.00250772  0.03631584  0.06983655
   -0.01136146]
  [ 0.0309379   0.08124758  0.01423656 -0.07316011 -0.08640572
   -0.06386783]
  [-0.05440821 -0.06705679  0.00531284  0.04408077  0.00684699
    0.00846565]]

 [[ 0.04894079 -0.00127493 -0.01626257 -0.03747974  0.0499923
   -0.05701496]
  [ 0.01730212  0.00337008 -0.0029683  -0.00466185  0.03058963
   -0.02099362]
  [ 0.01900142  0.0388035   0.05369381 -0.02914435 -0.01442878
   -0.02400161]
  [-0.05401142  0.0096776   0.0056147  -0.03247011  0.00174524
   -0.05136527]
  [-0.05440821 -0.06705679  0.00531284  0.04408077  0.00684699
    0.00846565]]], shape=(2, 5, 6), dtype=float32)


### Subclassing the Keras Embedding Layer

In [42]:
class PositionEmbeddingLayer(Layer):
    def __init__(self, seq_length, vocab_size, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.word_embedding_layer = Embedding(
                                    input_dim=vocab_size,
                                    output_dim=output_dim)
        self.position_embedding_layer = Embedding(
                                    input_dim=seq_length,
                                    output_dim=output_dim)
    
    def call(self, inputs):
        position_indices = tf.range(tf.shape(inputs)[-1])
        embedded_words = self.word_embedding_layer(inputs)
        embedded_indices = self.position_embedding_layer(position_indices)
        return embedded_words + embedded_indices

In [43]:
my_embedding_layer = PositionEmbeddingLayer(output_sequence_length,
                                           vocab_size, output_length)
embedded_layer_output = my_embedding_layer(vectorized_words)
print("Output from my_embedded_layer:",embedded_layer_output)

Output from my_embedded_layer: tf.Tensor(
[[[ 0.00637855  0.00673728 -0.03841537  0.07975792 -0.04048951
   -0.04499073]
  [-0.00443989  0.01852877  0.02897206 -0.01387301 -0.00669345
   -0.08556506]
  [-0.03428832  0.02473707  0.00638186  0.00301592  0.03876719
    0.02305264]
  [-0.02576433 -0.00288253  0.05967318  0.04707334 -0.00611119
    0.05196183]
  [ 0.0235363  -0.04792032  0.03497241 -0.00968364 -0.04204199
    0.00966486]]

 [[-0.00708105 -0.03494656 -0.06759453  0.05501465  0.04467877
   -0.02714159]
  [-0.0053491   0.0260518  -0.01416695  0.04234953  0.05477338
   -0.02255105]
  [-0.05188236 -0.03578881  0.00754474  0.01513701 -0.05179217
    0.08367383]
  [-0.02195654  0.00105239  0.06624406  0.01884438 -0.00712731
    0.02803523]
  [ 0.0235363  -0.04792032  0.03497241 -0.00968364 -0.04204199
    0.00966486]]], shape=(2, 5, 6), dtype=float32)


### Positional Encoding in Transformers

In [60]:
class PositionEmbeddingFixedWeights(Layer):
    def __init__(self, seq_length, vocab_size, output_dim, **kwargs):
        super().__init__(**kwargs)
        word_embedding_matrix = self.get_position_encoding(vocab_size, output_dim)
        pos_embedding_matrix = self.get_position_encoding(seq_length, output_dim)

        self.word_embedding_layer = Embedding(
                                        input_dim = vocab_size,
                                        output_dim = output_dim,
                                        weights = [word_embedding_matrix],
                                        trainable=False)
        self.position_embedding_layer = Embedding(
                                        input_dim = seq_length,
                                        output_dim = output_dim,
                                        weights = [pos_embedding_matrix],
                                        trainable=False)
    
    def get_position_encoding(self, seq_len, d, n=10000):
        P = np.zeros((seq_len, d))
        for k in range(seq_len):
            for i in range(int(d/2)):
                denominator = np.power(n,2*i/d)
                P[k, 2*i] = np.sin(k/denominator)
                P[k, 2*i+1] = np.cos(k/denominator)
        return P
    
    def call(self,inputs):
        position_indices = tf.range(tf.shape(inputs)[-1])
        embedded_words = self.word_embedding_layer(inputs)
        embedded_indices = self.position_embedding_layer(position_indices)
        return embedded_words + embedded_indices
        
        
        

In [64]:
attnisallyouneed_embedding = PositionEmbeddingFixedWeights(output_sequence_length,
                                         vocab_size, output_length)
attnisallyouneed_output = attnisallyouneed_embedding(vectorized_words)
print("Output from my_embedded_layer: ", attnisallyouneed_output)

Output from my_embedded_layer:  tf.Tensor(
[[[-0.9589243   1.2836622   0.23000172  1.9731903   0.01077196
    1.9999421 ]
  [ 0.56205547  1.5004725   0.3213085   1.9603932   0.01508068
    1.9999142 ]
  [ 1.566284    0.3377554   0.41192317  1.9433732   0.01938933
    1.999877  ]
  [ 1.0504174  -1.4061394   0.2314966   1.9860148   0.01077211
    1.9999698 ]
  [-0.7568025   0.3463564   0.18459873  1.982814    0.00861763
    1.9999628 ]]

 [[ 0.14112     0.0100075   0.1387981   1.9903207   0.00646326
    1.9999791 ]
  [ 0.08466846 -0.11334133  0.23099795  1.9817369   0.01077207
    1.9999605 ]
  [ 1.8185948  -0.8322937   0.185397    1.9913884   0.00861771
    1.9999814 ]
  [ 0.14112     0.0100075   0.1387981   1.9903207   0.00646326
    1.9999791 ]
  [-0.7568025   0.3463564   0.18459873  1.982814    0.00861763
    1.9999628 ]]], shape=(2, 5, 6), dtype=float32)
