<font size=5>**Project - Training from scratch vs Transfer Learning**</font> 

- <font size=3>**Importing the Modules**</font> 

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

- <font size=3>**Preparing the Dataset**</font> 

In [2]:
# Load the Fashion MNIST data 
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

In [3]:
# Let us trim the train data by considering the first 30,000 data samples
X_train_full = X_train_full[:30000]
y_train_full = y_train_full[:30000]

In [4]:
# Let us also trim the test data by considering the first 5000 data samples
X_test = X_test[:5000]
y_test = y_test[:5000]

In [5]:
# Scale the train and test datasets by dividing with 255. so that the values will be in the range of 0-1
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

In [6]:
# Let us divide the X_train_full such that the first 5000 samples form X_valid and the remaining to be in X_train.
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]

In [7]:
# Similarly, let us divide the y_train_full such that the first 5000 samples form y_valid and the remaining to be in y_train.
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

- <font size=3>**Dividing the data sets**</font> 

In [8]:
# Define the split_dataset function
def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A), (X[y_5_or_6], y_B))

In [9]:
# Now call the split_dataset on the X_train and y_train.
(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)

In [10]:
# Similarly, call the split_dataset on the X_valid and y_valid.
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)

In [11]:
# Similarly, call the split_dataset on the X_test and y_test.
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)

In [12]:
# Set the random seed for tf and np.
np.random.seed(42)
tf.random.set_seed(42)

- <font size=3>**Build and Fit the Model A**</font> 

In [13]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))

In [14]:
model_A.compile(loss="sparse_categorical_crossentropy",
               optimizer=keras.optimizers.SGD(learning_rate=1e-3),
               metrics=["accuracy"])

In [15]:
history = model_A.fit(X_train_A, y_train_A, epochs=5,
            validation_data=(X_valid_A, y_valid_A))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [16]:
# Save the model_A we created.
model_A.save("my_model_A.h5")

- <font size=3>**Build and Fit the Model B**</font> 

In [17]:
model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300,100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation="selu"))
model_B.add(keras.layers.Dense(1, activation="softmax"))

In [18]:
model_B.compile(loss="binary_crossentropy",
               optimizer=keras.optimizers.SGD(learning_rate=1e-3),
               metrics=["accuracy"])

In [19]:
history = model_B.fit(X_train_B, y_train_B, epochs=5, validation_data=(X_valid_B, y_valid_B))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


- <font size=3>**Creating new model based on existing model A**</font> 

In [20]:
# See the model_B summary using summary() on model_B.
model_B.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 300)               235500    
_________________________________________________________________
dense_7 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_8 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_9 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_10 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                

In [21]:
# Create model_A_clone which is the copy of model_A.
model_A_clone = keras.models.clone_model(model_A)

In [22]:
# Get the weights of model_A using get_weights(), and set the model parameters for model_A_clone using set_weights().
model_A_clone.set_weights(model_A.get_weights())

In [23]:
# Now, create a new model model_B_on_A, based on existing layers of model_A
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])

In [24]:
# Add the final dense layer with 1 neuron to the model_B_on_A. Set the activation to "sigmoid", as this is a binary classification problem.
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

In [25]:
# Set all the layers, except the last layer, of model_B_on_A to be non-trainable.
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False

In [26]:
# Now check the number of trainable parameters of model_B_on_A.
model_B_on_A.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_3 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_4 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_12 (Dense)             (None, 1)                

In [27]:
model_B_on_A.compile(loss="binary_crossentropy",
                    optimizer= keras.optimizers.SGD(learning_rate=1e-3),
                    metrics=["accuracy"])

In [28]:
history = model_B_on_A.fit(X_train_B, y_train_B,
                          epochs=5, validation_data=(X_valid_B, y_valid_B))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


- <font size=3>**Evaluating the models**</font> 

In [29]:
# Use evaluate() method on model_B and pass X_test_B and y_test_B as arguments to it.
model_B.evaluate(X_test_B, y_test_B)



[0.03187720105051994, 0.49844881892204285]

In [30]:
# Use evaluate() method on model_B_on_A and pass X_test_B and y_test_B as arguments to it.
model_B_on_A.evaluate(X_test_B, y_test_B)



[0.09890769422054291, 0.988624632358551]

<font size=4>**Author:**</font> 

- Prince Raj