# Custom LayerNormalization

In [1]:
import tensorflow as tf
from tensorflow import keras
tf.random.set_seed(42)

## Load dataset for testing if layer produces valid output

In [2]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
print(X_train.shape)

(60000, 28, 28)


## Keras' LayerNorm

In [3]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.LayerNormalization()
])

model.compile(optimizer='sgd', loss='mean_squared_error')

model.predict(X_train[:3])

2021-12-20 17:42:24.752305: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-20 17:42:25.224237: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3072 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 960, pci bus id: 0000:01:00.0, compute capability: 5.2
2021-12-20 17:42:25.656696: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100


array([[-0.4407901 , -0.4407901 , -0.4407901 , ..., -0.4407901 ,
        -0.4407901 , -0.4407901 ],
       [-0.47280163, -0.47280163, -0.47280163, ..., -0.47280163,
        -0.47280163, -0.47280163],
       [-0.37816158, -0.37816158, -0.37816158, ..., -0.37816158,
        -0.37816158, -0.37816158]], dtype=float32)

## Custom LayerNorm

In [4]:
class LayerNorm(keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
    def build(self, input_shape):
        self.alpha = self.add_weight(name='alpha', shape=input_shape[-1:], 
                                     dtype=tf.float32, 
                                     initializer=tf.ones_initializer())
        self.beta = self.add_weight(name='beta', shape=input_shape[-1:], 
                                    dtype=tf.float32, 
                                    initializer=tf.zeros_initializer())
        super().build(input_shape)
        
    def compute_output_shape(self, batch_input_shape):
        return batch_input_shape

    def call(self, X):
        mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)
        stddev = tf.sqrt(variance)
        return tf.multiply(self.alpha, X - mean) / (stddev + 0.001) + self.beta

In [5]:
custom_model = keras.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    LayerNorm()
])

custom_model.compile(optimizer='sgd', loss='mean_squared_error')

custom_model.predict(X_train[:3])

array([[-0.4407846 , -0.4407846 , -0.4407846 , ..., -0.4407846 ,
        -0.4407846 , -0.4407846 ],
       [-0.47279605, -0.47279605, -0.47279605, ..., -0.47279605,
        -0.47279605, -0.47279605],
       [-0.37815583, -0.37815583, -0.37815583, ..., -0.37815583,
        -0.37815583, -0.37815583]], dtype=float32)