In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Input, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from keras.optimizers import Adam
import matplotlib.pyplot as plt


Using TensorFlow backend.


# Preprocessing Data

In [2]:
batch_size = 128
num_classes = 10
epochs = 2



(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
y_train=y_train.reshape(-1,1)
y_test=y_test.reshape(-1,1)


Data_Train=np.hstack([X_train,y_train])
Data_Test=np.hstack([X_test,y_test])

X_train_1=Data_Train[Data_Train[:,784]<5]
X_test_1=Data_Test[Data_Test[:,784]<5]

X_train_2=Data_Train[Data_Train[:,784]>=5]
X_test_2=Data_Test[Data_Test[:,784]>=5]

y_train_1= X_train_1[:,784]
y_train_2= X_train_2[:,784]
y_test_1 = X_test_1[:,784]
y_test_2 = X_test_2[:,784]

X_train_1= X_train_1[:,0:784]
X_train_2= X_train_2[:,0:784]
X_test_1= X_test_1[:,0:784]
X_test_2= X_test_2[:,0:784]

y_train_1 = keras.utils.to_categorical(y_train_1, num_classes)
y_test_1 = keras.utils.to_categorical(y_test_1, num_classes)
y_train_2 = keras.utils.to_categorical(y_train_2, 10)
y_test_2 = keras.utils.to_categorical(y_test_2, 10)

X_train_1= X_train_1.reshape(len(X_train_1),28,28,1)
X_train_2= X_train_2.reshape(len(X_train_2),28,28,1)
X_test_1= X_test_1[:,0:784].reshape(len(X_test_1),28,28,1)
X_test_2= X_test_2[:,0:784].reshape(len(X_test_2),28,28,1)


# Compiling the first CNN model to train on numbers 0 to 4

In [3]:
model = Sequential()
model.add(Conv2D(16, (3, 3), activation='relu', padding='same', name='conv_1', 
                 input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2), name='maxpool_1'))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', name='conv_2'))
model.add(MaxPooling2D((2, 2), name='maxpool_2'))
#model.add(Conv2D(128, (3, 3), activation='relu', padding='same', name='conv_3'))
#model.add(MaxPooling2D((2, 2), name='maxpool_3'))
#model.add(Conv2D(128, (3, 3), activation='relu', padding='same', name='conv_4'))
#model.add(MaxPooling2D((2, 2), name='maxpool_4'))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu', name='dense_1'))
model.add(Dense(128, activation='relu', name='dense_2'))
model.add(Dense(num_classes, activation='softmax', name='output'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


# Fitting model 1 on training set 1

In [4]:
history = model.fit(X_train_1, y_train_1,
                    batch_size=batch_size,
                    epochs=2,
                    verbose=1,
                    validation_data=(X_test_1, y_test_1))
                    
score = model.evaluate(X_test_1, y_test_1, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Instructions for updating:
Use tf.cast instead.
Train on 30596 samples, validate on 5139 samples
Epoch 1/2
Epoch 2/2
Test loss: 0.0026019426719830134
Test accuracy: 0.9989102919014958


# Compiling the second CNN model to train on numbers 5 to 9

In [5]:
model_2= Sequential()
model_2.add(Conv2D(16, (3, 3), activation='relu', padding='same', name='conv_12', 
                 input_shape=(28, 28, 1)))
model_2.add(MaxPooling2D((2, 2), name='maxpool_12'))
model_2.add(Conv2D(32, (3, 3), activation='relu', padding='same', name='conv_22'))
model_2.add(MaxPooling2D((2, 2), name='maxpool_22'))
model_2.add(Flatten())
model_2.add(Dropout(0.5))
model_2.add(Dense(256, activation='relu', name='dense_12'))
model_2.add(Dense(128, activation='relu', name='dense_22'))
model_2.add(Dense(10, activation='softmax', name='output'))

model_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Transfering the first CNN base weights to the 2nd CNN model

In [6]:
def copyModel2Model(model_source,model_target,certain_layer=""):        
    for tg,sr in zip(model_target.layers,model_source.layers):
        wk0= sr.get_weights()
        tg.set_weights(wk0)
        if tg.name==certain_layer:
            break
    print("model source was copied into model target")

In [7]:
copyModel2Model(model,model_2,'conv_22')

model source was copied into model target


# Freezing the first 2 layers of the 2nd CNN model

In [8]:
for layer in model_2.layers[:2]:
    layer.trainable=False
for layer in model_2.layers[2:]:
    layer.trainable=True

# Fitting model 2 on training set 2

In [9]:
history = model_2.fit(X_train_2, y_train_2,
                    batch_size=batch_size,
                    epochs=2,
                    verbose=1,
                    validation_data=(X_test_2, y_test_2))
                    
score = model_2.evaluate(X_test_2, y_test_2, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 29404 samples, validate on 4861 samples
Epoch 1/2
Epoch 2/2
Test loss: 0.005394464856571855
Test accuracy: 0.9980662393731802
