# 12.

In [None]:
import tensorflow as tf
from tensorflow import keras


class MyNormalization(keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def build(self, batch_input_shape):
        self.alpha = self.add_weight(name='alpha', shape=batch_input_shape[-1:],
                                     initializer='ones', dtype=tf.float32)
        self.beta = self.add_weight(name='beta', shape=batch_input_shape[-1:],
                                     initializer='zeros', dtype=tf.float32)
        super().build(batch_input_shape)

    def call(self, X):
        mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)
        std_dev = tf.sqrt(variance)
        epsilon = 0.001
        return self.alpha * (X - mean) / (std_dev + epsilon) + self.beta

In [None]:
from tensorflow.keras import layers


tf.random.set_seed(42)

num_samples = 100
num_features = 3

X = tf.random.normal(shape=(num_samples, num_features))

my_output = MyNormalization()(X)
test_output = layers.LayerNormalization()(X)

print(tf.math.reduce_mean(tf.math.square(test_output - my_output)))

tf.Tensor(7.403202e-06, shape=(), dtype=float32)


# 13.

In [27]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = tf.cast(X_train_full, tf.float32)
y_train_full = tf.cast(y_train_full, tf.float32)
X_test = tf.cast(X_test, tf.float32)
y_test = tf.cast(y_test, tf.float32)
X_valid, X_train = X_train_full[:10000], X_train_full[10000:30000]
y_valid, y_train = y_train_full[:10000], y_train_full[10000:30000]

In [39]:
def print_status_bar(iteration, total, loss, metrics=None, valid_loss=None,
                     valid_metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result())
                          for m in [loss] + (metrics or [])])
    if valid_loss is not None:
        valid_metrics = " - " + " - ".join(["validation {}: {:.4f}".format(m.name, m.result())
                                    for m in [valid_loss] + (valid_metrics or [])])
    else:
        valid_metrics = ""
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics + valid_metrics,
          end=end)

In [40]:
import numpy as np


keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

n_units = 100
n_layers = 1
hidden_layer = keras.layers.Dense(n_units, activation="relu")
model = keras.models.Sequential([
    MyNormalization(),
    keras.layers.Flatten(),
    *[hidden_layer for _ in range(n_layers)],
    keras.layers.Dense(10, activation='softmax')
])

n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy

mean_loss = keras.metrics.Mean()
mean_valid_loss = keras.metrics.Mean()

train_metrics = [keras.metrics.SparseCategoricalAccuracy()]
valid_metrics = [keras.metrics.SparseCategoricalAccuracy()]

for epoch in range(1, n_epochs+1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps+1):
        X_batch = X_train[(step-1)*batch_size:step*batch_size]
        y_batch = y_train[(step-1)*batch_size:step*batch_size]

        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)  # adding l2 loss to MSE
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        mean_loss(loss)
        for metric in train_metrics:
            metric(y_batch, y_pred)

        if step == n_steps:
            y_valid_pred = model(X_valid)
            valid_loss = tf.reduce_mean(loss_fn(y_valid, y_valid_pred))
            mean_valid_loss(valid_loss)
            for metric in valid_metrics:
                metric(y_valid, y_valid_pred)
        else:
            print_status_bar(step*batch_size, len(y_train),
                             mean_loss, train_metrics)

    print_status_bar(len(y_train), len(y_train),
                     mean_loss, train_metrics,
                     mean_valid_loss, valid_metrics)
    for metric in [mean_loss, mean_valid_loss] + train_metrics + valid_metrics:
        metric.reset_states()

Epoch 1/5
20000/20000 - mean: 0.5414 - sparse_categorical_accuracy: 0.8101 - validation mean: 0.4089 - validation sparse_categorical_accuracy: 0.8515
Epoch 2/5
6176/20000 - mean: 0.3862 - sparse_categorical_accuracy: 0.8574

KeyboardInterrupt: 

In [43]:
import numpy as np


keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

lower_layers = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation="relu"),
])
upper_layers = keras.models.Sequential([
    keras.layers.Dense(10, activation="softmax"),
])
model = keras.models.Sequential([
    lower_layers, upper_layers
])

n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
loss_fn = keras.losses.sparse_categorical_crossentropy
lower_optimizer = keras.optimizers.SGD(learning_rate=1e-4)
upper_optimizer = keras.optimizers.Nadam(learning_rate=1e-3)

mean_loss = keras.metrics.Mean()
mean_valid_loss = keras.metrics.Mean()

train_metrics = [keras.metrics.SparseCategoricalAccuracy()]
valid_metrics = [keras.metrics.SparseCategoricalAccuracy()]

for epoch in range(1, n_epochs+1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps+1):
        X_batch = X_train[(step-1)*batch_size:step*batch_size]
        y_batch = y_train[(step-1)*batch_size:step*batch_size]

        with tf.GradientTape(persistent=True) as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)  # adding l2 loss to MSE
        for layers, optimizer in ((lower_layers, lower_optimizer),
                                       (upper_layers, upper_optimizer)):
            gradients = tape.gradient(loss, layers.trainable_variables)
            optimizer.apply_gradients(zip(gradients, layers.trainable_variables))
        del tape

        mean_loss(loss)
        for metric in train_metrics:
            metric(y_batch, y_pred)

        if step == n_steps:
            y_valid_pred = model(X_valid)
            valid_loss = tf.reduce_mean(loss_fn(y_valid, y_valid_pred))
            mean_valid_loss(valid_loss)
            for metric in valid_metrics:
                metric(y_valid, y_valid_pred)
        else:
            print_status_bar(step*batch_size, len(y_train),
                             mean_loss, train_metrics)

    print_status_bar(len(y_train), len(y_train),
                     mean_loss, train_metrics,
                     mean_valid_loss, valid_metrics)
    for metric in [mean_loss, mean_valid_loss] + train_metrics + valid_metrics:
        metric.reset_states()

Epoch 1/5
20000/20000 - mean: 12.1008 - sparse_categorical_accuracy: 0.6855 - validation mean: 5.7868 - validation sparse_categorical_accuracy: 0.7295
Epoch 2/5
20000/20000 - mean: 3.7682 - sparse_categorical_accuracy: 0.7454 - validation mean: 2.8633 - validation sparse_categorical_accuracy: 0.7542
Epoch 3/5
20000/20000 - mean: 2.0243 - sparse_categorical_accuracy: 0.7606 - validation mean: 1.7734 - validation sparse_categorical_accuracy: 0.7578
Epoch 4/5
20000/20000 - mean: 1.2530 - sparse_categorical_accuracy: 0.7722 - validation mean: 1.2712 - validation sparse_categorical_accuracy: 0.7640
Epoch 5/5
20000/20000 - mean: 0.9198 - sparse_categorical_accuracy: 0.7796 - validation mean: 1.0503 - validation sparse_categorical_accuracy: 0.7710
