<a href="https://colab.research.google.com/github/SalvatoreAdalberto/mlapp/blob/main/LAB8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np


In [47]:
class PositionalEncoding(tf.keras.layers.Layer):
  def __init__(self, length, channels) -> None:
    super(PositionalEncoding, self).__init__()
    self.l = length
    self.f = channels
  
  def get_angles(self, pos, i, d_model):
    angle_rates = 1/np.power(10000, (2*(i//2))/np.float32(d_model))
    return pos * angle_rates
  
  def call(self, x):
    angle_rads = self.get_angles(np.arange(self.l)[:, np.newaxis],
                            np.arange(self.f)[np.newaxis, :],
                            self.f)

    # apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = tf.cast(angle_rads[np.newaxis, ...], dtype=tf.float32)

    return pos_encoding + x

In [None]:
pe = PositionalEncoding(100, 10)
pe.call()

In [40]:
class AttentionLayer(tf.keras.layers.Layer):
  def __init__(self, dim) -> None:
    super(AttentionLayer, self).__init__()
    self.dense_layers = [tf.keras.layers.Dense(dim, use_bias=False) for _ in range(3)]


  def call(self, x):
    q, k,v = (dense(x) for dense in self.dense_layers)
    qk = tf.linalg.matmul(q, k, transpose_b=True)
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_logits = qk / tf.math.sqrt(dk)
    weights = tf.nn.softmax(scaled_logits, axis=-1)

    out = tf.linalg.matmul(weights, v)

    
    return out

In [41]:
x = np.random.rand(1, 100, 10)
a = AttentionLayer(10)
v = a(x)
print(v.shape)

(1, 100, 10)


In [45]:
class TransformerLayer(tf.keras.layers.Layer):
  def __init__(self, n_samples, dim) -> None:
    super(TransformerLayer, self).__init__()
    self.attention = AttentionLayer(dim)
    
    self.norm1 = tf.keras.layers.Normalization()
    self.norm2 = tf.keras.layers.Normalization()
    self.feed_forward = tf.keras.layers.Dense(dim)

  
  def call(self, x):
   
    y = self.attention(x)
    x = self.norm1(y + x)
    y = self.feed_forward(x)
    x = self.norm2(y+x)

    return x
    

class Transformer(tf.keras.Model):
  def __init__(self, n_samples, dim, N) -> None:
    super(Transformer, self).__init__()
    self.pos_enc = PositionalEncoding(n_samples, dim)
    self.tf_layers = [TransformerLayer(n_samples, dim) for _ in range(N)]

  def call(self, x):
    y = self.pos_enc(x)
    for t_layer in self.tf_layers:
      y = t_layer(y)
    
    return y





In [48]:
x = np.random.rand(1, 100, 10)
a = Transformer(100, 10, 4)
v = a(x)
print(v.shape)

(1, 100, 10)
