# Perceiver Block 

![](https://miro.medium.com/max/4188/1*41GYOpmCItZMxO4V7U4FGw.jpeg)

## Import modules 

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model

## Perceiver 2D

In [2]:
class Perceiver2D(keras.Model):
    """ 
    
    Parameters
    ----------
    data_dim: int 
        text text text 
    latent_dim: int 
        text text text 
    projection_dim: int 
        text text text 
    num_heads: int 
        Number of multihead attention units for transformer
    num_transformer_blocks: int 
        Number of transformer blocks
    ffn_units: list(int)
        text text text 
    dropout_rate: float
        Rate for dropout  
    num_iterations: int
        Number of times to pass input through perciever

    Returns
    -------
    tf.constant 
        3D (feature,short_time,long_time) tensor
    """
    
    def __init__(
        self,
        data_dim,
        latent_dim,
        projection_dim,
        num_heads,
        num_transformer_blocks,
        ffn_units,
        dropout_rate,
        num_iterations,
    ):
        super(Perceiver2D, self).__init__()

        self.latent_dim = latent_dim
        self.data_dim = data_dim
        self.projection_dim = projection_dim
        self.num_heads = num_heads
        self.num_transformer_blocks = num_transformer_blocks
        self.ffn_units = ffn_units
        self.dropout_rate = dropout_rate
        self.num_iterations = num_iterations
        
    def create_ffn(self,hidden_units, dropout_rate):
        ffn_layers = []
        for units in hidden_units[:-1]:
            ffn_layers.append(layers.Dense(units, activation=tf.nn.gelu))

        ffn_layers.append(layers.Dense(units=hidden_units[-1]))
        ffn_layers.append(layers.Dropout(dropout_rate))

        ffn = keras.Sequential(ffn_layers)
        return ffn
    
    def create_cross_attention_module(self,latent_dim, data_dim,\
                    projection_dim, ffn_units, dropout_rate):

        inputs = {
            # Recieve the latent array as an input of shape [1, latent_dim, projection_dim].
            "latent_array": layers.Input(shape=(latent_dim, projection_dim)),
            # Recieve the data_array (encoded image) as an input of shape [batch_size, data_dim, projection_dim].
            "data_array": layers.Input(shape=(data_dim, projection_dim)),
        }

        # Apply layer norm to the inputs
        latent_array = layers.LayerNormalization(epsilon=1e-6)(inputs["latent_array"])
        data_array = layers.LayerNormalization(epsilon=1e-6)(inputs["data_array"])

        # Create query tensor: [1, latent_dim, projection_dim].
        query = layers.Dense(units=projection_dim)(latent_array)
        # Create key tensor: [batch_size, data_dim, projection_dim].
        key = layers.Dense(units=projection_dim)(data_array)
        # Create value tensor: [batch_size, data_dim, projection_dim].
        value = layers.Dense(units=projection_dim)(data_array)

        # Generate cross-attention outputs: [batch_size, latent_dim, projection_dim].
        attention_output = layers.Attention(use_scale=True, dropout=0.1)(
            [query, key, value], return_attention_scores=False
        )
        # Skip connection 1.
        attention_output = layers.Add()([attention_output, latent_array])

        # Apply layer norm.
        attention_output = layers.LayerNormalization(epsilon=1e-6)(attention_output)
        # Apply Feedforward network.
        ffn = self.create_ffn(hidden_units=ffn_units, dropout_rate=dropout_rate)
        outputs = ffn(attention_output)
        # Skip connection 2.
        outputs = layers.Add()([outputs, attention_output])

        # Create the Keras model.
        model = keras.Model(inputs=inputs, outputs=outputs)
        return model
    
    def create_transformer_module(self,latent_dim,projection_dim,num_heads,\
        num_transformer_blocks,ffn_units,dropout_rate,):

        # input_shape: [1, latent_dim, projection_dim]
        inputs = layers.Input(shape=(latent_dim, projection_dim))

        x0 = inputs
        # Create multiple layers of the Transformer block.
        for _ in range(num_transformer_blocks):
            # Apply layer normalization 1.
            x1 = layers.LayerNormalization(epsilon=1e-6)(x0)
            # Create a multi-head self-attention layer.
            attention_output = layers.MultiHeadAttention(
                num_heads=num_heads, key_dim=projection_dim, dropout=0.1
            )(x1, x1)
            # Skip connection 1.
            x2 = layers.Add()([attention_output, x0])
            # Apply layer normalization 2.
            x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
            # Apply Feedforward network.
            ffn = self.create_ffn(hidden_units=ffn_units, dropout_rate=dropout_rate)
            x3 = ffn(x3)
            # Skip connection 2.
            x0 = layers.Add()([x3, x2])

        # Create the Keras model.
        model = keras.Model(inputs=inputs, outputs=x0)
        return model

    def build(self, input_shape):
        # Create latent array.
        self.latent_array = self.add_weight(
            shape=(self.latent_dim, self.projection_dim),
            initializer="random_normal",
            trainable=True,
        )

        # Create cross-attenion module.
        self.cross_attention = self.create_cross_attention_module(
            self.latent_dim,
            self.data_dim,
            self.projection_dim,
            self.ffn_units ,
            self.dropout_rate,
        )

        # Create Transformer module.
        self.transformer = self.create_transformer_module(
            self.latent_dim,
            self.projection_dim,
            self.num_heads,
            self.num_transformer_blocks,
            self.ffn_units,
            self.dropout_rate,
        )
        
        super(Perceiver2D, self).build(input_shape)

    def call(self, inputs):
        # Prepare cross-attention inputs.
        cross_attention_inputs = {
            "latent_array": tf.expand_dims(self.latent_array, 0),
            "data_array": inputs,
        }
        # Apply the cross-attention and the Transformer modules iteratively.
        for _ in range(num_iterations):
            # Apply cross-attention from the latent array to the data array.
            latent_array = self.cross_attention(cross_attention_inputs)
            # Apply self-attention Transformer to the latent array.
            latent_array = self.transformer(latent_array)
            # Set the latent array of the next iteration.
            cross_attention_inputs["latent_array"] = latent_array
            
        return latent_array

In [3]:
learning_rate = 0.001
weight_decay = 0.0001
dropout_rate = 0.2

data_dim = 512
latent_dim = 256  # Size of the latent array.
projection_dim = 256  # Embedding size of each element in the data and latent arrays.
num_heads = 8  # Number of Transformer heads.

ffn_units = [
    projection_dim,
    projection_dim,
]  # Dim of the Feedforward network.

num_transformer_blocks = 4
num_iterations = 3

model = Perceiver2D(
        data_dim,
        latent_dim,
        projection_dim,
        num_heads,
        num_transformer_blocks,
        ffn_units,
        dropout_rate,
        num_iterations,
)
model.build((None,512,256))
model.summary()

Model: "perceiver2d"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model (Functional)           (None, 256, 256)          330497    
_________________________________________________________________
model_1 (Functional)         (None, 256, 256)          8944640   
Total params: 9,340,673
Trainable params: 9,340,673
Non-trainable params: 0
_________________________________________________________________


In [4]:
x = np.random.uniform(0,1,(1,512,256))
yh = model(x)
if yh.shape != (1, 256, 256):
    raise ValueError('Perciever 2D output is the wrong size!!!')
else:
    print("Perciever 2D operation test passed!!!")

Perciever 2D operation test passed!!!
