In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

In [4]:
class layerNormalization(keras.layers.Layer):
    def __init__(self,eps = 1e-3, **kwargs):
        super().__init__(**kwargs)
        self.eps = eps
    
    def build(self, batch_input_shape):
        self.alpha = self.add_weight(name="alpha", shape = batch_input_shape[-1], 
                                     initializer = "zeros", dtype = tf.float32, trainable = True)
        self.beta = self.add_weight(name="beta", shape = batch_input_shape[-1], 
                                    initializer = "ones", dtype = tf.float32, trainable = True)
        super().build(batch_input_shape)
    
    def call(self, input):
        mean, variance = tf.nn.moments(input, axes = -1, keepdims = True)
        return self.alpha * (input - mean) / (tf.sqrt(self.eps + variance)) + self.beta
    
    def compute_output_shape(self, batch_input_shape):
        return tf.TensorShape(batch_input_shape)
    
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "eps": self.eps}

In [5]:
X = X_train.astype(np.float32)

custom_layer_norm = layerNormalization()
keras_layer_norm = keras.layers.Normalization()

tf.reduce_mean(keras.losses.mean_absolute_error(custom_layer_norm(X), keras_layer_norm(X)))

<tf.Tensor: shape=(), dtype=float32, numpy=201.94565>

In [6]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [45]:
def status_process_bar(iteration, size = 15):
    work = int(iteration/len(y_train)*size)
    arrow = ">" if iteration*batch_size < len(y_train) else ""
    not_done = size - work
    process = "[" + work*"=" + arrow + "."*not_done + "]"
    form = "{}/{} {}".format(iteration, len(y_train), process)
    return form

In [44]:
def process_bar(iteration, loss, metrics=None, val_loss = None, val_accuracy = None):
    metric = " - ".join(["{} - {:.4f}".format(m.name, m.result()) for m in [loss] + ([metrics] if metrics else []) +
                                             ([val_loss] if val_loss else []) + ([val_accuracy] if val_accuracy else []) ])
    form = "\r{} - {}".format(status_process_bar(iteration), metric)
    end = "" if iteration < len(y_train) else "\n"
    print(form, end = end)

In [9]:
model_pre = keras.models.Sequential([
    keras.layers.Flatten(input_shape = X_train.shape[1:]),
    keras.layers.Dense(300, activation = "relu", kernel_initializer = "he_normal",
                       kernel_regularizer = "l1"),
    keras.layers.Dense(300, activation = "relu", kernel_initializer = "he_normal",
                       kernel_regularizer = "l1"),
])

model_post = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [300]),
    keras.layers.Dense(300, activation = "relu", kernel_initializer = "he_normal",
                       kernel_regularizer = "l1"),
    keras.layers.Dense(300, activation = "relu", kernel_initializer = "he_normal",
                       kernel_regularizer = "l1"),
    keras.layers.Dense(10, activation = "softmax", kernel_initializer = "he_normal")
])

In [10]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
def random_batch(X, y):
    id = np.random.randint(len(X_train), size = batch_size, dtype = int)
    return X[id], y[id] 

In [11]:
ohot_train = np.zeros(shape = [len(y_train), 10], dtype = np.float32)
ohot_test = np.zeros(shape = [len(y_test), 10], dtype = np.float32)
ohot_valid = np.zeros(shape = [len(y_valid), 10], dtype = np.float32)

In [12]:
for i in range(len(y_train)):
    ohot_train[i, y_train[i]] = 1
for i in range(len(y_test)):
    ohot_test[i, y_test[i]] = 1
for i in range(len(y_valid)):
    ohot_valid[i, y_valid[i]] = 1

In [31]:
batch_size = 32
total = len(X_train)//batch_size
epochs = 5
loss_cat = tf.keras.losses.CategoricalCrossentropy()

mean_loss = keras.metrics.Mean(name = "loss")
mean_loss_val = keras.metrics.Mean(name = "val_loss")

metrics = tf.keras.metrics.Accuracy(name="accuracy")
metrics_val = tf.keras.metrics.Accuracy(name="val_accuracy")

optimizer_pre = keras.optimizers.Nadam(learning_rate=0.01, name = "Namdam")
optimizer_post = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False, name="SGD")

In [46]:
for epoch in range(1, epochs + 1):
    print("Epoch {}/{}".format(epoch, epochs))
    for iteration in range(1, total + 1):
        x_batch, y_batch_train = random_batch(X_train, ohot_train)
        with tf.GradientTape(persistent=True) as tape:
            y_batch_pre  = model_pre(x_batch, training = True)
            y_batch_post = model_post(y_batch_pre, training = True)
            loss = tf.reduce_mean(loss_cat(y_batch_train, y_batch_post))
        gradient_post = tape.gradient(loss, model_post.trainable_variables)
        gradient_pre = tape.gradient(loss, model_pre.trainable_variables)
        optimizer_post.apply_gradients(zip(gradient_post, model_post.trainable_variables))
        optimizer_pre.apply_gradients(zip(gradient_pre, model_pre.trainable_variables))
        del tape
        
        mean_loss(loss)
        metrics(y_batch_train, y_batch_post)
        process_bar(iteration = iteration*batch_size, loss = mean_loss, metrics = metrics)
    
    x = model_pre(X_valid)
    y_pred_valid = model_post(x)
    mean_loss_val(loss_cat(ohot_valid, y_pred_valid))
    metrics_val(ohot_valid, y_pred_valid)
    
    process_bar(iteration = len(y_train), loss = mean_loss, metrics = metrics, val_loss = mean_loss_val, val_accuracy = metrics_val)
    metrics.reset_states()
    mean_loss.reset_states()

Epoch 1/5

KeyboardInterrupt: 

In [29]:
mean_loss_val.name

'val_loss'