<a href="https://colab.research.google.com/github/Nathan-Mekuria-Solomon/ML-practice/blob/main/natural_language_processing_edited/Transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [2]:
# create positional embedding layer
class PositionalEncoding(keras.layers.Layer):
  def __init__(self, max_steps, max_dims, dtype= tf.float32, **kwargs):
    super().__init__(dtype= dtype, **kwargs)
    if max_dims % 2 == 1: max_dims+= 1 # max_dims must be even (why?)
    p, i = np.meshgrid(np.arange(max_steps), np.arange(max_dims))
    pos_embed = np.empty((1, max_steps, max_dims)) # on the book (1, max_steps, max_dims // 2)
    pos_embed[::2] = np.sin(p / 10000 ** (2 * i / max_dims)).T
    pos_embed[1::2] = np.cos(p / 10000 ** (2 * i / max_dims)).T
    self.positional_embedding = tf.constant(pos_embed.astype(self.dtype))

  def call(self, inputs):
    shape = tf.shape(inputs)
    return inputs + self.positional_embedding[:, :shape[-2], :shape[-1]]


In [3]:
# first layer of the transformer
embed_size = 512; max_steps = 500; vocab_size = 10000
encoder_inputs = keras.layers.Input(shape= [None], dtype= np.int32)
decoder_inputs = keras.layers.Input(shape= [None], dtype= np.int32)
embeddings = keras.layers.Embedding(vocab_size, embed_size)
encoder_embeddings = embeddings(encoder_inputs)
decoder_embeddings = embeddings(decoder_inputs)
positional_encoding = PositionalEncoding(max_steps, max_dims= embed_size)
encoder_in = positional_encoding(encoder_embeddings)
decoder_in = positional_encoding(decoder_embeddings)

In [15]:
# implementing a transformer
Z = encoder_in
for N in range(6):
  Z = keras.layers.Attention(use_scale= True)([Z, Z])

encoder_outputs = Z
Z = decoder_in
for N in range(6):
  Z = keras.layers.Attention(use_scale= True)([Z, Z])
  Z = keras.layers.Attention(use_scale= True)([Z, encoder_outputs])

outputs = keras.layers.TimeDistributed(keras.layers.Dense(vocab_size, activation= "softmax"))(Z)