# Chapter 11 – Training Deep Neural Networks

In [9]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np

## Glorot and He Initialization--for vanishing/exploding gradients

In [3]:
# 改变随机初始化连接权重分布的均值和标准差
keras.layers.Dense(10,activation='relu',kernel_initializer='he_normal')

<tensorflow.python.keras.layers.core.Dense at 0x9ce63f8f28>

In [4]:
[name for name in dir(keras.initializers) if not name.startswith("_")]

['Constant',
 'GlorotNormal',
 'GlorotUniform',
 'Identity',
 'Initializer',
 'Ones',
 'Orthogonal',
 'RandomNormal',
 'RandomUniform',
 'TruncatedNormal',
 'VarianceScaling',
 'Zeros',
 'constant',
 'deserialize',
 'get',
 'glorot_normal',
 'glorot_uniform',
 'he_normal',
 'he_uniform',
 'identity',
 'lecun_normal',
 'lecun_uniform',
 'ones',
 'orthogonal',
 'serialize',
 'zeros']

## Nonsaturating Activation Functions--for vanishing/exploding gradients

In [5]:
# 使用非饱和激活函数
[m for m in dir(keras.layers) if "relu" in m.lower()]

['LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']

In [6]:
(X_train_full,y_train_full),(X_test,y_test) = keras.datasets.fashion_mnist.load_data()

In [7]:
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [10]:
tf.random.set_seed(42)
np.random.seed(42)

In [15]:
# leakyrelu激活函数+he初始化

In [11]:
model = keras.models.Sequential([
  keras.layers.Flatten(input_shape=[28,28]),
  keras.layers.Dense(300,kernel_initializer='he_normal'),
  keras.layers.LeakyReLU(),
  keras.layers.Dense(100,kernel_initializer='he_normal'),
  keras.layers.LeakyReLU(),
  keras.layers.Dense(10,activation='softmax')
])

In [13]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.SGD(lr=1e-3),
              metrics=['accuracy'])

In [14]:
history=model.fit(X_train,y_train,epochs=10,validation_data=(X_valid,y_valid))

W0620 10:55:22.433485  6596 deprecation.py:323] From D:\Program Files\Anaconda\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
# selu激活函数+lecun初始化

In [17]:
np.random.seed(42)
tf.random.set_seed(42)

In [18]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28,28]))
model.add(keras.layers.Dense(300,activation='selu',kernel_initializer='lecun_normal'))

for layer in range(99):
  model.add(keras.layers.Dense(100,activation='selu',kernel_initializer='lecun_normal'))
  
model.add(keras.layers.Dense(10,activation='softmax'))

In [19]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.SGD(lr=1e-3),
              metrics=['accuracy'])

In [20]:
# training scale to (0,1) first
pixel_means = X_train.mean(axis=0, keepdims=True)
pixel_stds = X_train.std(axis=0, keepdims=True)
X_train_scaled = (X_train - pixel_means) / pixel_stds
X_valid_scaled = (X_valid - pixel_means) / pixel_stds
X_test_scaled = (X_test - pixel_means) / pixel_stds

In [21]:
history=model.fit(X_train_scaled,y_train,epochs=5,validation_data=(X_valid_scaled,y_valid))

Train on 55000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
