In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras

In [2]:
def split_dataset(x, y):
    y_5_or_6 = (y == 5) | (y == 6)
    y_a = y[~y_5_or_6]
    y_a[y_a > 6] -= 2
    y_b = (y[y_5_or_6] == 6).astype(np.float32)
    return ((x[~y_5_or_6], y_a), (x[y_5_or_6], y_b))

In [3]:
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train_full = x_train_full / 255.0
x_test = x_test / 255.0
x_valid, x_train = x_train_full[:5000], x_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

(x_train_a, y_train_a), (x_train_b, y_train_b) = split_dataset(x_train, y_train)
(x_valid_a, y_valid_a), (x_valid_b, y_valid_b) = split_dataset(x_valid, y_valid)
(x_test_a, y_test_a), (x_test_b, y_test_b) = split_dataset(x_test, y_test)
x_train_b = x_train_b[:200]
y_train_b = y_train_b[:200]

In [4]:
x_train_a.shape

(43986, 28, 28)

In [5]:
x_train_b.shape

(200, 28, 28)

In [6]:
y_train_a[:30]

array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,
       1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)

In [7]:
y_train_b[:30]

array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32)

In [8]:
tf.random.set_seed(42)
np.random.seed(42)

In [9]:
model_a = keras.models.Sequential()
model_a.add(keras.layers.Flatten(input_shape=[28, 28]))

for n_hidden in (300, 100, 50, 50, 50):
    model_a.add(keras.layers.Dense(n_hidden, activation='selu'))

model_a.add(keras.layers.Dense(8, activation='softmax'))

In [10]:
model_a.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_3 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_4 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_5 (Dense)              (None, 8)                 4

In [11]:
model_a.compile(loss='sparse_categorical_crossentropy', 
                optimizer=keras.optimizers.SGD(learning_rate=1e-3), 
                metrics=['acc'])

In [12]:
history = model_a.fit(x_train_a, y_train_a, epochs=20, 
                      validation_data=(x_valid_a, y_valid_a))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [13]:
model_a.save('model_a.h5')

In [14]:
model_b = keras.models.Sequential()
model_b.add(keras.layers.Flatten(input_shape=[28, 28]))

for n_hidden in (300, 100, 50, 50, 50):
    model_b.add(keras.layers.Dense(n_hidden, activation='selu'))

model_b.add(keras.layers.Dense(1, activation='sigmoid'))

In [15]:
model_b.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 300)               235500    
_________________________________________________________________
dense_7 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_8 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_9 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_10 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                

In [16]:
model_b.compile(loss='binary_crossentropy', 
                optimizer=keras.optimizers.SGD(learning_rate=1e-3), 
                metrics=['acc'])

In [17]:
history = model_b.fit(x_train_b, y_train_b, epochs=20, 
                      validation_data=(x_valid_b, y_valid_b))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
model_a = keras.models.load_model('model_a.h5')
model_b_on_a = keras.models.Sequential(model_a.layers[:-1])
model_b_on_a.add(keras.layers.Dense(1, activation='sigmoid'))

In [19]:
model_a_clone = keras.models.clone_model(model_a)
model_a_clone.set_weights(model_a.get_weights())

In [20]:
for layer in model_b_on_a.layers[:-1]:
    layer.trainable = False

model_b_on_a.compile(loss='binary_crossentropy', 
                     optimizer=keras.optimizers.SGD(learning_rate=1e-3), 
                     metrics=['acc'])

In [21]:
history = model_b_on_a.fit(x_train_b, y_train_b, epochs=4, 
                           validation_data=(x_valid_b, y_valid_b))

for layer in model_b_on_a.layers[:-1]:
    layer.trainable = True

model_b_on_a.compile(loss='binary_crossentropy', 
                     optimizer=keras.optimizers.SGD(learning_rate=1e-3), 
                     metrics=['acc'])

history = model_b_on_a.fit(x_train_b, y_train_b, epochs=16, 
                           validation_data=(x_valid_b, y_valid_b))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [22]:
model_b.evaluate(x_test_b, y_test_b)



[0.14084076881408691, 0.9704999923706055]

In [23]:
model_b_on_a.evaluate(x_test_b, y_test_b)



[0.06810946017503738, 0.9929999709129333]

In [24]:
(100 - 97.05) / (100 - 99.3)

4.214285714285701