In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train, y_train = X_train_full[5000:], y_train_full[5000:]
X_valid, y_valid = X_train_full[:5000], y_train_full[:5000]
X_train = X_train / 255.0
X_test = X_test / 255.0
X_valid = X_valid / 255.0

# Weight Initialization

In [4]:
[name for name in dir(keras.initializers) if not name.startswith("_")]

['Constant',
 'ConstantV2',
 'GlorotNormal',
 'GlorotNormalV2',
 'GlorotUniform',
 'GlorotUniformV2',
 'HeNormal',
 'HeNormalV2',
 'HeUniform',
 'HeUniformV2',
 'Identity',
 'IdentityV2',
 'Initializer',
 'LOCAL',
 'LecunNormal',
 'LecunNormalV2',
 'LecunUniform',
 'LecunUniformV2',
 'Ones',
 'OnesV2',
 'Orthogonal',
 'OrthogonalV2',
 'RandomNormal',
 'RandomNormalV2',
 'RandomUniform',
 'RandomUniformV2',
 'TruncatedNormal',
 'TruncatedNormalV2',
 'VarianceScaling',
 'VarianceScalingV2',
 'Zeros',
 'ZerosV2',
 'constant',
 'deserialize',
 'generic_utils',
 'get',
 'glorot_normal',
 'glorot_normalV2',
 'glorot_uniform',
 'glorot_uniformV2',
 'he_normal',
 'he_normalV2',
 'he_uniform',
 'he_uniformV2',
 'identity',
 'init_ops',
 'initializer',
 'initializers',
 'initializers_v1',
 'inspect',
 'keras_export',
 'lecun_normal',
 'lecun_normalV2',
 'lecun_uniform',
 'lecun_uniformV2',
 'legacy_serialization',
 'normal',
 'one',
 'ones',
 'orthogonal',
 'populate_deserializable_objects',
 'r

In [5]:
# keras.layers.Dense(10, activation="relu", kernel_initializer="he_normal")

In [6]:
# init = keras.initializers.VarianceScaling(scale=2, mode="fan_avg", distribution="uniform")
# keras.layers.Dense(10, activation="relu", kernel_initializer=init)

In [7]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(300, kernel_initializer="he_normal"),
    keras.layers.LeakyReLU(),
    keras.layers.Dense(100, kernel_initializer="he_normal"),
    keras.layers.LeakyReLU(),
    keras.layers.Dense(10, activation="softmax")
])

In [8]:
model.compile(loss="sparse_categorical_crossentropy",
             optimizer=keras.optimizers.SGD(learning_rate=1e-3),
             metrics=["accuracy"])

In [9]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Batch Normalization

In [14]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, kernel_initializer="he_normal"),
    keras.layers.BatchNormalization(),
    keras.layers.Activation("relu"),
    keras.layers.Dense(100, kernel_initializer="he_normal"),
    keras.layers.BatchNormalization(),
    keras.layers.Activation("relu"),
    keras.layers.Dense(10, activation="softmax")
])

In [16]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_3 (Batc  (None, 784)              3136      
 hNormalization)                                                 
                                                                 
 dense_6 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_4 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 activation_2 (Activation)   (None, 300)               0         
                                                                 
 dense_7 (Dense)             (None, 100)              

In [11]:
model.compile(loss="sparse_categorical_crossentropy",
             optimizer=keras.optimizers.SGD(learning_rate=1e-3),
             metrics=["accuracy"])

In [12]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Gradient Clipping

In [17]:
optimizer = keras.optimizers.SGD(clipvalue=1.0)
# optimizer = keras.optimizers.SGD(clipnorm=1.0)
model.compile(loss="mse", optimizer=optimizer)

# Reusing Pretrained Layers

In [49]:
def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6)
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2
    y_B = (y[y_5_or_6] == 6).astype(np.float32)
    return ((X[~y_5_or_6], y_A),
           (X[y_5_or_6], y_B))

(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)
X_train_B = X_train_B[:200]
y_train_B = y_train_B[:200]

In [50]:
y_train_A[:30]

array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,
       1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)

In [51]:
y_train_B[:30]

array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32)

In [52]:
tf.random.set_seed(42)
np.random.seed(42)

In [53]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28,28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))

In [54]:
model_A.compile(loss="sparse_categorical_crossentropy",
             optimizer=keras.optimizers.SGD(learning_rate=1e-3),
             metrics=["accuracy"])

In [55]:
history = model_A.fit(X_train_A, y_train_A,epochs=20,
                     validation_data=(X_valid_A, y_valid_A))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [56]:
model_A.save("my_model_A.h5")

In [63]:
model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation="selu"))
model_B.add(keras.layers.Dense(1, activation="sigmoid"))

In [64]:
model_B.compile(loss="binary_crossentropy",
               optimizer=keras.optimizers.SGD(learning_rate=1e-3),
               metrics=["accuracy"])

In [65]:
history = model_B.fit(X_train_B, y_train_B, epochs=20,
                     validation_data=(X_valid_B, y_valid_B))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [66]:
model_B.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_8 (Flatten)         (None, 784)               0         
                                                                 
 dense_39 (Dense)            (None, 300)               235500    
                                                                 
 dense_40 (Dense)            (None, 100)               30100     
                                                                 
 dense_41 (Dense)            (None, 50)                5050      
                                                                 
 dense_42 (Dense)            (None, 50)                2550      
                                                                 
 dense_43 (Dense)            (None, 50)                2550      
                                                                 
 dense_44 (Dense)            (None, 1)                

In [68]:
model_A.layers

[<keras.layers.reshaping.flatten.Flatten at 0x18c7ccd5160>,
 <keras.layers.core.dense.Dense at 0x18c7c80cd30>,
 <keras.layers.core.dense.Dense at 0x18c7ccd51f0>,
 <keras.layers.core.dense.Dense at 0x18c7c0805e0>,
 <keras.layers.core.dense.Dense at 0x18c7bfafcd0>,
 <keras.layers.core.dense.Dense at 0x18c7bff8760>,
 <keras.layers.core.dense.Dense at 0x18c7c83fa30>]