### 1. Dense Layer.

In [2]:
import tensorflow as tf
from tensorflow import keras

### The `mnist` dataset.

In [3]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [12]:
X_train_tensors = tf.convert_to_tensor(X_train.reshape(-1, 28*28)/255.0, dtype='float32')
X_test_tensors = tf.convert_to_tensor(X_test.reshape(-1, 28*28)/255.0, dtype='float32')

y_test_tensors = tf.convert_to_tensor(y_test)
y_train_tensors = tf.convert_to_tensor(y_train)
y_train_tensors [:5]

<tf.Tensor: shape=(5,), dtype=uint8, numpy=array([5, 0, 4, 1, 9], dtype=uint8)>

> Let's build a simple `FFNN` for this dataset

In [5]:
X_train_tensors[0].shape

TensorShape([28, 28])

In [13]:
model_1 = keras.Sequential([
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
])
model_1.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
model_1.fit(X_train_tensors, y_train_tensors, epochs=2, batch_size=64, validation_data=(X_test_tensors, y_test_tensors))
model_1.summary()

Epoch 1/2
Epoch 2/2
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)                50240     
_________________________________________________________________
dense_7 (Dense)              (None, 10)                650       
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________


## Creating a custom `model` using subclassing from `Keras`

In [16]:
class Net(keras.Model):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.dense1 = keras.layers.Dense(64)
        self.dense2 = keras.layers.Dense(num_classes)
    def call(self, x):
        x = tf.nn.relu(self.dense1(x))
        return self.dense2(x)
    def model(self):
      x = keras.layers.Input((784, ))
      return keras.Model(inputs=[x], outputs=self.call(x))


In [17]:
model_2 = Net()

model_2.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
model_2.fit(X_train_tensors, y_train_tensors, epochs=2, batch_size=64, validation_data=(X_test_tensors, y_test_tensors))
model_2.model().summary()

Epoch 1/2
Epoch 2/2
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_10 (Dense)             (None, 64)                50240     
_________________________________________________________________
tf.nn.relu (TFOpLambda)      (None, 64)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 10)                650       
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________


Now let's build our custom `Dense` Layer

In [18]:
class Dense(keras.layers.Layer):
  def __init__(self, units, input_dim):
    super().__init__()
    self.weights_ =  self.add_weight(
        name="weights_",
        shape=(input_dim, units),
        initializer="random_normal",
        trainable=True,
    )
    self.bias_ = self.add_weight(
        name="bias_",
        shape=(units,),
        initializer="zeros",
        trainable=True
    )

  def call(self, inputs):
    return tf.matmul(inputs, self.weights_) + self.bias_

  """
  def call(self, inputs):
    return inputs @ self.weights_ + self.bias_
  """


> Creating the model using our custom `Dense` layer.

In [21]:
class Net(keras.Model):
  def __init__(self):
    super().__init__()
    self.dense2 = Dense(64, 784)
    self.dense3 = Dense(10, 64)

  def call(self, x):
    x = keras.activations.relu(self.dense2(x))
    return keras.activations.softmax(self.dense3(x))

  def model(self):
      x = keras.layers.Input((784, ))
      return keras.Model(inputs=[x], outputs=self.call(x))

In [22]:
model_3 = Net()

model_3.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
model_3.fit(X_train_tensors, y_train_tensors, epochs=2, batch_size=64, validation_data=(X_test_tensors, y_test_tensors))
model_3.model().summary()

Epoch 1/2
Epoch 2/2
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_14 (Dense)             (None, 64)                50240     
_________________________________________________________________
tf.nn.relu_1 (TFOpLambda)    (None, 64)                0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                650       
_________________________________________________________________
tf.compat.v1.nn.softmax (TFO (None, 10)                0         
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________


Observations: As we can see in the `Dense` layer we have passed units and outputs like :
```python
self.dense2 = Dense(64, 784)
```

Of with in a normal dense layer from keras we just need to pass `units` like:

```python
 self.dense2 = Dense(64)
```

So to solve this we will implement the ``build`` method during the creation of our layer as follows:

In [61]:
class Dense(keras.layers.Layer):
  def __init__(self, units):
    super(Dense, self).__init__()
    self.units = units

  def build(self, input_shape):
      self.weights_ = self.add_weight(
            name="weights_",
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
      self.bias_ = self.add_weight(
          name="bias_",
          shape=(self.units,),
          initializer="zeros",
          trainable=True
      )

  def call(self, inputs):
    return inputs @ self.weights_ + self.bias_


> Trainning our `model` on the new  custom ``Dense`` layer.

In [62]:
class Net(keras.Model):
  def __init__(self):
    super().__init__()
    self.dense2 = Dense(64)
    self.dense3 = Dense(10)

  def call(self, x):
    x = keras.activations.relu(self.dense2(x))
    return keras.activations.softmax(self.dense3(x))

  def model(self):
      x = keras.layers.Input((784, ))
      return keras.Model(inputs=[x], outputs=self.call(x))


In [63]:
model_4 = Net()

model_4.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
model_4.fit(X_train_tensors, y_train_tensors, epochs=2, batch_size=64, validation_data=(X_test_tensors, y_test_tensors))
model_4.model().summary()

Epoch 1/2


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


Epoch 2/2
Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_44 (Dense)             (None, 64)                50240     
_________________________________________________________________
tf.nn.relu_5 (TFOpLambda)    (None, 64)                0         
_________________________________________________________________
dense_45 (Dense)             (None, 10)                650       
_________________________________________________________________
tf.compat.v1.nn.softmax_4 (T (None, 10)                0         
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________


#### Bonus: Creating a `RelU` activation function.

In [77]:
class ReLU:
  def __call__(self, x):
    return tf.math.maximum(x, 0)

#### OR -------------

class ReLU2(keras.layers.Layer):
  def __init__self():
    super().__init__()

  def call(self, x):
    return tf.maximum(x, 0)
   

> Trainning the model using custom `Relu`

In [78]:
class Net(keras.Model):
  def __init__(self):
    super().__init__()
    self.dense2 = Dense(64)
    self.dense3 = Dense(10)
    self.relu = ReLU2() # or self.relu = ReLU()

  def call(self, x):
    x = self.relu(self.dense2(x))
    return keras.activations.softmax(self.dense3(x))

  def model(self):
      x = keras.layers.Input((784, ))
      return keras.Model(inputs=[x], outputs=self.call(x))


In [79]:
model_5 = Net()

model_5.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
model_5.fit(X_train_tensors, y_train_tensors, epochs=2, batch_size=64, validation_data=(X_test_tensors, y_test_tensors))
model_5.model().summary()

Epoch 1/2


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


Epoch 2/2
Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_52 (Dense)             (None, 64)                50240     
_________________________________________________________________
re_l_u2 (ReLU2)              (None, 64)                0         
_________________________________________________________________
dense_53 (Dense)             (None, 10)                650       
_________________________________________________________________
tf.compat.v1.nn.softmax_6 (T (None, 10)                0         
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________
