## Vanishing/Exploding Gradients

In [1]:
from tensorflow import keras

In [2]:
keras.layers.Dense(10, activation="relu", 
                    kernel_initializer="he_normal")

<keras.layers.core.dense.Dense at 0x1f812ceb670>

In [4]:
he_avg_init = keras.initializers.VarianceScaling(scale=2, 
                                                mode="fan_avg", 
                                                distribution="uniform")
keras.layers.Dense(10, activation="relu", 
                    kernel_initializer=he_avg_init)

<keras.layers.core.dense.Dense at 0x1f812dadb50>

In [5]:
# using leaky relu
leaky_relu = keras.layers.LeakyReLU(alpha=0.2)
keras.layers.Dense(10, activation=leaky_relu, 
                    kernel_initializer="he_normal")

<keras.layers.core.dense.Dense at 0x1f812dadd30>

In [6]:
keras.layers.Dense(10, activation="selu", 
                    kernel_initializer="lecun_normal")

<keras.layers.core.dense.Dense at 0x1f812dad970>

In [8]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(300, activation="elu", 
                        kernel_initializer="he_normal"), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(100, activation="elu", 
                        kernel_initializer="he_normal"), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(10, activation="softmax")
])

In [9]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_3 (Batc  (None, 784)              3136      
 hNormalization)                                                 
                                                                 
 dense_7 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_4 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 dense_8 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_5 (Batc  (None, 100)             

In [10]:
[(var.name, var.trainable) for var in model.layers[1].variables]

[('batch_normalization_3/gamma:0', True),
 ('batch_normalization_3/beta:0', True),
 ('batch_normalization_3/moving_mean:0', False),
 ('batch_normalization_3/moving_variance:0', False)]

In [11]:
model.layers[1].updates

  model.layers[1].updates


[]

In [2]:
from tensorflow import keras

In [3]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(300, 
        kernel_initializer="he_normal", 
        use_bias=False), 
    keras.layers.BatchNormalization(), 
    keras.layers.Activation("elu"), 
    keras.layers.Dense(100, 
        kernel_initializer="he_normal", 
        use_bias=False), 
    keras.layers.BatchNormalization(), 
    keras.layers.Activation("elu"), 
    keras.layers.Dense(10, activation="softmax")
])

In [4]:
optimizer = keras.optimizers.SGD(clipvalue=1.0)
model.compile(loss="mse", optimizer=optimizer)

## Reusing Pretrained Layers

In [2]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

X_train, X_val, y_train, y_val = X_train[:50000], X_train[50000:], y_train[:50000], y_train[50000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
from tensorflow import keras

model_A = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(300, activation="relu"), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(100, activation="relu"), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(10, activation="softmax")
])
model_A.compile(loss="sparse_categorical_crossentropy", 
                optimizer="sgd", 
                metrics="accuracy")
model_A.fit(X_train, y_train, epochs=100, 
            validation_data=(X_val, y_val), 
            callbacks=[keras.callbacks.EarlyStopping(monitor="val_loss", 
                                                     patience=10, 
                                                     restore_best_weights=True)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


<keras.callbacks.History at 0x2e51e6e85b0>

In [5]:
model_A.evaluate(X_test, y_test)



[0.3781094253063202, 0.9657999873161316]

In [None]:
model_B_on_A = keras.Sequential(model_A.la)