In [1]:
import tensorflow as tf
import numpy as np

In [2]:
tf.__version__

'2.9.1'

In [13]:
input_embeddings = [[
    "Salut", "comment", "ca", "va", "?",
]]

output_embeddings = [[
    "<START>", "Hi", "how", "are", "you", "?",
]]
print(input_embeddings)
print(output_embeddings)

[['Salut', 'comment', 'ca', 'va', '?']]
[['<START>', 'Hi', 'how', 'are', 'you', '?']]


In [14]:
def get_vocabulary(sequences):

    token_to_info = {}

    for sequence in sequences:
        for word in sequence:
            if word not in token_to_info:
                token_to_info[word] = len(token_to_info)
    return token_to_info

input_voc = get_vocabulary(input_embeddings)
output_voc = get_vocabulary(output_embeddings)

input_voc["<START>"] = len(input_voc)
input_voc["<END>"] = len(input_voc)
input_voc["<PAD>"] = len(input_voc)

output_voc["<END>"] = len(output_voc)
output_voc["<PAD>"] = len(output_voc)

print(input_voc)
print(output_voc)

{'Salut': 0, 'comment': 1, 'ca': 2, 'va': 3, '?': 4, '<START>': 5, '<END>': 6, '<PAD>': 7}
{'<START>': 0, 'Hi': 1, 'how': 2, 'are': 3, 'you': 4, '?': 5, '<END>': 6, '<PAD>': 7}


In [15]:
def sequences_to_int(sequences, voc):
    for sequence in sequences:
        for s, word in enumerate(sequence):
            sequence[s] = voc[word]
    return(np.array(sequences))

input_seq = sequences_to_int(input_embeddings, input_voc)
output_seq = sequences_to_int(output_embeddings, output_voc)

print(input_seq)
print(output_seq)

[[0 1 2 3 4]]
[[0 1 2 3 4 5]]


In [49]:
class EmbeddingLayer(tf.keras.layers.Layer):

    def __init__(self, nb_token, **kwargs):
        self.nb_token = nb_token
        super(**kwargs).__init__()

    def build(self, input_shape):
        self.word_embedding = tf.keras.layers.Embedding(
            self.nb_token, 256,
        )
        super().build(input_shape)

    def call(self, x):
        embed = self.word_embedding(x)
        return embed


class ScaledDotProductAttention(tf.keras.layers.Layer):

    def __init__(self, **kwargs):
        super(**kwargs).__init__()

    def build(self, input_shape):
        self.query_layer = tf.keras.layers.Dense(256)
        self.value_layer = tf.keras.layers.Dense(256)
        self.key_layer = tf.keras.layers.Dense(256)
        super().build(input_shape)

    def call(self, x):
        Q = self.query_layer(x)
        K = self.key_layer(x)
        V = self.value_layer(x)
        QK = tf.matmul(Q, K, transpose_b=True)
        QK = QK / tf.math.sqrt(256.)
        softmax_QK = tf.nn.softmax(QK, axis=-1)
        attention = tf.matmul(softmax_QK, V)
        return attention

def test():
    layer_input = tf.keras.Input(shape=(5))
    embedding = EmbeddingLayer(nb_token=5)(layer_input)
    attention = ScaledDotProductAttention()(embedding)
    model = tf.keras.Model(layer_input, attention)
    model.summary()
    return model

m_test = test()
out = m_test(input_seq)
print(out.shape)

Model: "model_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_35 (InputLayer)       [(None, 5)]               0         
                                                                 
 embedding_layer_34 (Embeddi  (None, 5, 256)           1280      
 ngLayer)                                                        
                                                                 
 scaled_dot_product_attentio  (None, 5, 256)           197376    
 n_34 (ScaledDotProductAtten                                     
 tion)                                                           
                                                                 
Total params: 198,656
Trainable params: 198,656
Non-trainable params: 0
_________________________________________________________________
(1, 5, 256)


In [59]:
class EncoderLayer(tf.keras.layers.Layer):

    def __init__(self, **kwargs):
        super(**kwargs).__init__()

    def build(self, input_shape):
        self.scaled_dot_product_attention = ScaledDotProductAttention()
        self.norm = tf.keras.layers.LayerNormalization()
        self.dense_out = tf.keras.layers.Dense(256)
        super().build(input_shape)

    def call(self, x):
        attention = self.scaled_dot_product_attention(x)
        post_attention = self.norm(x + attention)
        x = self.dense_out(post_attention)
        enc_output = self.norm(x + post_attention)
        return enc_output

def test():
    layer_input = tf.keras.Input(shape=(5))
    embedding = EmbeddingLayer(nb_token=5)(layer_input)
    enc_output = EncoderLayer()(embedding)
    model = tf.keras.Model(layer_input, enc_output)
    model.summary()
    return model

m_test = test()
out = m_test(input_seq)
print(out.shape)

Model: "model_33"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_44 (InputLayer)       [(None, 5)]               0         
                                                                 
 embedding_layer_43 (Embeddi  (None, 5, 256)           1280      
 ngLayer)                                                        
                                                                 
 encoder_layer_8 (EncoderLay  (None, 5, 256)           263680    
 er)                                                             
                                                                 
Total params: 264,960
Trainable params: 264,960
Non-trainable params: 0
_________________________________________________________________
(1, 5, 256)


In [63]:
class MultiHeadAttention(tf.keras.layers.Layer):

    def __init__(self, dim=256, nb_head=8, **kwargs):
        self.head_dim = 256 // 8
        print("head_dim", self.head_dim)
        super(**kwargs).__init__()

    def build(self, input_shape):
        self.query_layer = tf.keras.layers.Dense(256)
        self.value_layer = tf.keras.layers.Dense(256)
        self.key_layer = tf.keras.layers.Dense(256)
        super().build(input_shape)

    def call(self, x):
        return x
        return attention

def test():
    layer_input = tf.keras.Input(shape=(5))
    embedding = EmbeddingLayer(nb_token=5)(layer_input)
    multi_attention = MultiHeadAttention()(embedding)
    model = tf.keras.Model(layer_input, multi_attention)
    model.summary()
    return model

m_test = test()
out = m_test(input_seq)
print(out.shape)

head_dim 32
Model: "model_37"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_48 (InputLayer)       [(None, 5)]               0         
                                                                 
 embedding_layer_47 (Embeddi  (None, 5, 256)           1280      
 ngLayer)                                                        
                                                                 
 multi_head_attention (Multi  (None, 5, 256)           0         
 HeadAttention)                                                  
                                                                 
Total params: 1,280
Trainable params: 1,280
Non-trainable params: 0
_________________________________________________________________
(1, 5, 256)
