# Transfer Learning :

# Prediction of Even/odd number from the MINIST Handwritten dataset 

In [2]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("fivethirtyeight")
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [3]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [4]:
#tf.random.set_seed(42)
#np.random.seed(42)

LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation="softmax")]


model = tf.keras.models.Sequential(LAYERS)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 300)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
__________________________________________________

In [6]:
model.compile(loss = "sparse_categorical_crossentropy", optimizer = "SGD", metrics = ['accuracy'])

In [7]:
model.fit(X_train, y_train, epochs = 10, validation_data = (X_test, y_test))
#history = model.fit(X_train, Y_train, validation_data=VALIDATION_SET, batch_size=200, epochs=EPOCHS)

Train on 55000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1c08333c160>

In [8]:
model.save("pretrained_mnist_model.h5")

# Transfer Learning 

In [10]:
pretrained_mnist_data = tf.keras.models.load_model("pretrained_mnist_model.h5")

In [11]:
pretrained_mnist_data.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 300)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
__________________________________________________

# Creating a new model ( Transfer learning)
with lower layers from the past model and changing the output layer to binary classification with sigmoid as an activation function 

In [12]:
lower_pretrained_layer = pretrained_mnist_data.layers[:-1]

In [13]:
new_model = tf.keras.models.Sequential(lower_pretrained_layer)

In [14]:
new_model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

In [15]:
new_model.compile(loss = "binary_crossentropy", 
                  optimizer = "SGD", metrics =["accuracy"])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [25]:
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 300)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 101       
Total params: 265,701
Trainable params: 265,701
Non-trainable params: 0
________________________________________________

In [26]:
np.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [27]:
np.where(y_train%2 == 0, 1,0)

array([0, 0, 1, ..., 0, 1, 1])

In [28]:
def update_even_odd_labels(labels):
    for idx, label in enumerate(labels):
        labels[idx] = np.where(label % 2 == 0, 1, 0)
    return labels

In [29]:
y_train_bin, y_test_bin, y_valid_bin = update_even_odd_labels([y_train, y_test, y_valid])

In [30]:
np.unique(y_valid_bin)

array([0, 1])

In [31]:
history = new_model.fit(X_train, y_train_bin, epochs=10,
                    validation_data=(X_valid, y_valid_bin), verbose=2)

Train on 55000 samples, validate on 5000 samples
Epoch 1/10
55000/55000 - 16s - loss: 0.1944 - acc: 0.9267 - val_loss: 0.1199 - val_acc: 0.9610
Epoch 2/10
55000/55000 - 14s - loss: 0.1077 - acc: 0.9632 - val_loss: 0.0891 - val_acc: 0.9704
Epoch 3/10
55000/55000 - 15s - loss: 0.0871 - acc: 0.9705 - val_loss: 0.0777 - val_acc: 0.9724
Epoch 4/10
55000/55000 - 19s - loss: 0.0758 - acc: 0.9740 - val_loss: 0.0680 - val_acc: 0.9776
Epoch 5/10
55000/55000 - 14s - loss: 0.0680 - acc: 0.9777 - val_loss: 0.0624 - val_acc: 0.9794
Epoch 6/10
55000/55000 - 14s - loss: 0.0627 - acc: 0.9784 - val_loss: 0.0612 - val_acc: 0.9792
Epoch 7/10
55000/55000 - 15s - loss: 0.0575 - acc: 0.9806 - val_loss: 0.0569 - val_acc: 0.9816
Epoch 8/10
55000/55000 - 18s - loss: 0.0534 - acc: 0.9817 - val_loss: 0.0550 - val_acc: 0.9818
Epoch 9/10
55000/55000 - 14s - loss: 0.0499 - acc: 0.9831 - val_loss: 0.0527 - val_acc: 0.9820
Epoch 10/10
55000/55000 - 10s - loss: 0.0469 - acc: 0.9848 - val_loss: 0.0582 - val_acc: 0.9794


In [32]:
new_model.evaluate(X_test, y_test_bin)



[0.058186717158928515, 0.9785]

In [40]:
X_new = X_test[:4]

y_test[:4], y_test_bin[:4]     

(array([7, 2, 1, 0], dtype=uint8), array([0, 1, 0, 1]))

In [47]:
np.argmax(new_model.predict(X_new), axis=-1)

array([0, 0, 0, 0], dtype=int64)