In [2]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("fivethirtyeight")
%load_ext tensorboard

In [3]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [4]:
tf.random.set_seed(42)
np.random.seed(42)

LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation="softmax")]


model = tf.keras.models.Sequential(LAYERS)

In [5]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])

  super(SGD, self).__init__(name, **kwargs)


In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266,610
Trainable params: 266,610
Non-trai

In [7]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 5s - loss: 1.5275 - accuracy: 0.5970 - val_loss: 0.9444 - val_accuracy: 0.7980 - 5s/epoch - 3ms/step
Epoch 2/10
1719/1719 - 4s - loss: 0.7465 - accuracy: 0.8287 - val_loss: 0.5868 - val_accuracy: 0.8596 - 4s/epoch - 2ms/step
Epoch 3/10
1719/1719 - 4s - loss: 0.5412 - accuracy: 0.8624 - val_loss: 0.4685 - val_accuracy: 0.8834 - 4s/epoch - 2ms/step
Epoch 4/10
1719/1719 - 4s - loss: 0.4591 - accuracy: 0.8771 - val_loss: 0.4104 - val_accuracy: 0.8940 - 4s/epoch - 2ms/step
Epoch 5/10
1719/1719 - 4s - loss: 0.4142 - accuracy: 0.8869 - val_loss: 0.3758 - val_accuracy: 0.9006 - 4s/epoch - 2ms/step
Epoch 6/10
1719/1719 - 4s - loss: 0.3852 - accuracy: 0.8938 - val_loss: 0.3525 - val_accuracy: 0.9052 - 4s/epoch - 2ms/step
Epoch 7/10
1719/1719 - 4s - loss: 0.3644 - accuracy: 0.8979 - val_loss: 0.3348 - val_accuracy: 0.9102 - 4s/epoch - 2ms/step
Epoch 8/10
1719/1719 - 4s - loss: 0.3485 - accuracy: 0.9022 - val_loss: 0.3209 - val_accuracy: 0.9138 - 4s/epoch - 2ms/step
Epoch 9/

In [8]:
model.save("full_mnist_model.h5")

In [9]:
load_full_mnist_model = tf.keras.models.load_model("full_mnist_model.h5")

In [10]:
load_full_mnist_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266,610
Trainable params: 266,610
Non-trai

In [11]:
np.where(y_train % 2 == 0, 1 , 0)

array([0, 0, 1, ..., 0, 1, 1])

In [12]:
y_train[0], y_train[-1] 

(7, 8)

In [13]:
def update_even_odd_labels(labels):
    for idx, label in enumerate(labels):
        labels[idx] = np.where(label % 2 == 0, 1 , 0)
    return labels

In [14]:
y_train_bin, y_valid_bin, y_test_bin = update_even_odd_labels([y_train, y_valid, y_test])

In [15]:
# Check trainable or not:

for layer in load_full_mnist_model.layers:
    print(f"{layer.name}: {layer.trainable}")

flatten: True
dense: True
leaky_re_lu: True
dense_1: True
leaky_re_lu_1: True
dense_2: True


In [16]:
# make lower layer UN-trainable except the last layer:

for layer in load_full_mnist_model.layers[:-1]:
    layer.trainable = False
    print(f"Now {layer.name}: {layer.trainable}")

Now flatten: False
Now dense: False
Now leaky_re_lu: False
Now dense_1: False
Now leaky_re_lu_1: False


In [17]:
new_model = tf.keras.models.Sequential(load_full_mnist_model.layers[:-1])
new_model.add(
    tf.keras.layers.Dense(1, activation="sigmoid")
)

In [18]:
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 101       
                                                                 
Total params: 265,701
Trainable params: 101
Non-traina

In [19]:
new_model.get_config()

{'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 28, 28),
    'dtype': 'float32',
    'name': 'flatten_input',
    'ragged': False,
    'sparse': False}},
  {'class_name': 'Flatten',
   'config': {'batch_input_shape': (None, 28, 28),
    'data_format': 'channels_last',
    'dtype': 'float32',
    'name': 'flatten',
    'trainable': False}},
  {'class_name': 'Dense',
   'config': {'activation': 'linear',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'HeNormal', 'config': {'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense',
    'trainable': False,
    'units': 300,
    'use_bias': True}},
  {'class_name': 'LeakyReLU',
   'config': {'alpha': 0.30000001192092896,
    'dtype': 'float32',
    'name': 'leaky_re_lu',
    'trainable': False}}

In [20]:
# Check trainable or not:

for layer in new_model.layers:
    print(f"{layer.name}: {layer.trainable}")

flatten: False
dense: False
leaky_re_lu: False
dense_1: False
leaky_re_lu_1: False
dense_3: True


In [21]:
new_model.compile(loss="binary_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])

  super(SGD, self).__init__(name, **kwargs)


In [22]:
new_history = new_model.fit(X_train, y_train_bin, epochs=10,
                    validation_data=(X_valid, y_valid_bin), verbose=2)

Epoch 1/10
1719/1719 - 5s - loss: 0.4631 - accuracy: 0.7917 - val_loss: 0.3569 - val_accuracy: 0.8564 - 5s/epoch - 3ms/step
Epoch 2/10
1719/1719 - 4s - loss: 0.3552 - accuracy: 0.8486 - val_loss: 0.3265 - val_accuracy: 0.8626 - 4s/epoch - 2ms/step
Epoch 3/10
1719/1719 - 4s - loss: 0.3365 - accuracy: 0.8555 - val_loss: 0.3126 - val_accuracy: 0.8686 - 4s/epoch - 2ms/step
Epoch 4/10
1719/1719 - 4s - loss: 0.3262 - accuracy: 0.8603 - val_loss: 0.3041 - val_accuracy: 0.8712 - 4s/epoch - 2ms/step
Epoch 5/10
1719/1719 - 4s - loss: 0.3191 - accuracy: 0.8631 - val_loss: 0.2979 - val_accuracy: 0.8740 - 4s/epoch - 2ms/step
Epoch 6/10
1719/1719 - 4s - loss: 0.3136 - accuracy: 0.8659 - val_loss: 0.2929 - val_accuracy: 0.8756 - 4s/epoch - 2ms/step
Epoch 7/10
1719/1719 - 4s - loss: 0.3092 - accuracy: 0.8683 - val_loss: 0.2889 - val_accuracy: 0.8800 - 4s/epoch - 2ms/step
Epoch 8/10
1719/1719 - 4s - loss: 0.3055 - accuracy: 0.8699 - val_loss: 0.2856 - val_accuracy: 0.8828 - 4s/epoch - 2ms/step
Epoch 9/

In [23]:
new_model.evaluate(X_test, y_test_bin)



[0.29175853729248047, 0.8763999938964844]

In [24]:
X_new = X_test[:3]
y_test[:3], y_test_bin[:3]

(array([7, 2, 1], dtype=uint8), array([0, 1, 0]))

In [25]:
new_model.predict(X_new)

array([[0.02084696],
       [0.96824336],
       [0.11972325]], dtype=float32)

In [26]:
np.argmax(new_model.predict(X_new), axis=-1)

array([0, 0, 0])