<a href="https://colab.research.google.com/github/Kuz-man/hands-on-machine-learning-with-scikit-learn-keras-and-tensorflow/blob/master/hands_on_ML_chapter_11_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

from tensorflow.keras.layers import Dense, Input, Flatten
from tensorflow.keras.optimizers import Nadam

In [None]:
# Build a DNN with 20 hidden layers of 100 neurons each (that’s too many, but it’s the point of this exercise). Use He initialization and the ELU activation function.
# Using Nadam optimization and early stopping, train the network on the CIFAR10 dataset.
# You can load it with keras.datasets.cifar10.load_​data(). The dataset is composed of 60,000 32 × 32–pixel color images (50,000 for training, 10,000 for testing) with 10 classes,
# so you’ll need a softmax output layer with 10 neurons. Remember to search for the right learning rate each time you change the model’s architecture or hyperparameters.

(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()


X_train = X_train_full[:45000]
y_train = y_train_full[:45000]
X_valid = X_train_full[45000:]
y_valid = y_train_full[45000:]

print(y_valid.shape)

model = tf.keras.Sequential()
model.add(Input(shape=(32, 32, 3)))
model.add(Flatten())
model.add(Dense(100, activation="elu", kernel_initializer="he_normal"))
for _ in range(20):
  model.add(Dense(100, activation="elu", kernel_initializer="he_normal"))

model.add(Dense(10, activation="softmax"))

model.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3)

model.compile(loss='sparse_categorical_crossentropy', optimizer=Nadam(lr=1e-5), metrics=['acc'])

model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=50, callbacks=[early_stopping])

(5000, 1)
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_88 (Dense)             (None, 100)               307300    
_________________________________________________________________
dense_89 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_90 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_91 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_92 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_93 (Dense)             (None, 100)    

<tensorflow.python.keras.callbacks.History at 0x7f480bee77b8>

In [None]:
model.evaluate(X_test, y_test)



[1.6212340593338013, 0.4309000074863434]

In [None]:
# Now try adding Batch Normalization and compare the learning curves: Is it converging faster than before? Does it produce a better model? How does it affect training speed?
from tensorflow.keras.layers import BatchNormalization

model_bn = tf.keras.Sequential()
model_bn.add(Flatten(input_shape=(32, 32, 3)))
for _ in range(20):
  model_bn.add(Dense(100, activation="elu", kernel_initializer="he_normal"))
  model_bn.add(BatchNormalization())

model_bn.add(Dense(10, activation="softmax"))

model_bn.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3)

model_bn.compile(loss='sparse_categorical_crossentropy', optimizer=Nadam(lr=1e-5), metrics=['acc'])

model_bn.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=50, callbacks=[early_stopping])

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_6 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_131 (Dense)            (None, 100)               307300    
_________________________________________________________________
batch_normalization_20 (Batc (None, 100)               400       
_________________________________________________________________
dense_132 (Dense)            (None, 100)               10100     
_________________________________________________________________
batch_normalization_21 (Batc (None, 100)               400       
_________________________________________________________________
dense_133 (Dense)            (None, 100)               10100     
_________________________________________________________________
batch_normalization_22 (Batc (None, 100)              

<tensorflow.python.keras.callbacks.History at 0x7f4813815668>

In [None]:
model_bn.evaluate(X_test, y_test)



[1.5196648836135864, 0.46790000796318054]

In [None]:
# Try replacing Batch Normalization with SELU, and make the necessary adjustements to ensure the network self-normalizes
# (i.e., standardize the input features, use LeCun normal initialization, make sure the DNN contains only a sequence of dense layers, etc.).

model_selu = tf.keras.Sequential()
model_selu.add(Flatten(input_shape=(32, 32, 3)))

for _ in range(20):
  model_selu.add(Dense(100, activation="selu", kernel_initializer="lecun_normal"))

model_selu.add(Dense(10, activation="softmax"))

model_selu.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3)

model_selu.compile(loss='sparse_categorical_crossentropy', optimizer=Nadam(lr=1e-5), metrics=['acc'])

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

model_selu.fit(X_train_scaled, y_train, validation_data=(X_valid_scaled, y_valid), epochs=50, callbacks=[early_stopping])

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_7 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_152 (Dense)            (None, 100)               307300    
_________________________________________________________________
dense_153 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_154 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_155 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_156 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_157 (Dense)            (None, 100)              

<tensorflow.python.keras.callbacks.History at 0x7f480a119f28>

In [None]:
model_selu.evaluate(X_test_scaled, y_test)



[1.5601130723953247, 0.45890000462532043]

In [None]:
# Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.

from tensorflow.keras.layers import AlphaDropout

model_selu_dr = tf.keras.Sequential()
model_selu_dr.add(Flatten(input_shape=(32, 32, 3)))

for _ in range(20):
  model_selu_dr.add(Dense(100, activation="selu", kernel_initializer="lecun_normal"))
  model_selu_dr.add(AlphaDropout(rate=0.1))

model_selu_dr.add(Dense(10, activation="softmax"))

model_selu_dr.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3)

model_selu_dr.compile(loss='sparse_categorical_crossentropy', optimizer=Nadam(lr=1e-5), metrics=['acc'])

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

model_selu_dr.fit(X_train_scaled, y_train, validation_data=(X_valid_scaled, y_valid), epochs=50, callbacks=[early_stopping])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 3072)              0         
_________________________________________________________________
dense (Dense)                (None, 100)               307300    
_________________________________________________________________
alpha_dropout (AlphaDropout) (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
alpha_dropout_1 (AlphaDropou (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
alpha_dropout_2 (AlphaDropou (None, 100)              

NameError: ignored

In [None]:
model_selu_dr.evaluate(X_test_scaled, y_test)

NameError: ignored