In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import time

print(tf.__version__)  # 2.1.0

2.1.0


In [2]:
mnist = keras.datasets.mnist
(train_nums, train_labels), (test_nums, test_labels) = mnist.load_data()
train_nums.shape, test_nums.shape

((60000, 28, 28), (10000, 28, 28))

In [3]:
train_nums_flattened = np.array([x.flatten() for x in train_nums])
test_nums_flattened = np.array([x.flatten() for x in test_nums])
train_nums_flattened.shape, test_nums_flattened.shape

((60000, 784), (10000, 784))

In [4]:
train_nums_flattened = train_nums_flattened / 255
test_nums_flattened = test_nums_flattened / 255

In [5]:
y_train_large = (train_labels >= 7)
y_train_odd = (train_labels % 2 == 1)
y_multilabel = np.c_[y_train_large, y_train_odd]
# y_multilabel[y_multilabel==True] = 1
# y_multilabel[y_multilabel==False] = 0
y_multilabel = y_multilabel.astype(int)

In [6]:
y_test_large = (test_labels >= 7)
y_test_odd = (test_labels % 2 == 1)
y_test_multilabel = np.c_[y_test_large, y_test_odd]
y_test_multilabel = y_test_multilabel.astype(int)

#### loss function 1: binary corss entropy loss
use `keras.losses.BinaryCrossentropy` as loss function

In [7]:
m_part1 = keras.Sequential([keras.layers.Dense(50, activation='relu', input_shape=[784]),
                                                            keras.layers.Dense(30, activation='relu')])
m_part2 = keras.Sequential([
    keras.layers.Dense(50, activation='relu', input_shape=[30]),
    keras.layers.Dense(2)])
model = keras.Sequential([m_part1, m_part2])

In [8]:
model.compile(optimizer='rmsprop', loss=keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy'])

In [9]:
model.fit(train_nums_flattened, y_multilabel, batch_size=64, epochs=5)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f5a89a07828>

In [10]:
model.evaluate(test_nums_flattened, y_test_multilabel)



[0.05502661752994172, 0.9823]

In [11]:
test_labels

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

In [12]:
model.predict(test_nums_flattened)

array([[  7.532341 ,  13.877022 ],
       [-15.061962 ,  -8.099563 ],
       [ -6.5343356,   7.7436457],
       ...,
       [ -9.076396 ,  -8.77918  ],
       [ -6.3520756,   7.9817066],
       [-20.035269 , -15.046896 ]], dtype=float32)

##### these two labels look like independent

In [13]:
tf.sigmoid(model.predict(test_nums_flattened))

<tf.Tensor: shape=(10000, 2), dtype=float32, numpy=
array([[9.9946481e-01, 9.9999905e-01],
       [2.8752316e-07, 3.0357973e-04],
       [1.4505866e-03, 9.9956673e-01],
       ...,
       [1.1431986e-04, 1.5388062e-04],
       [1.7400922e-03, 9.9965847e-01],
       [1.9897262e-09, 2.9188783e-07]], dtype=float32)>

In [14]:
tf.math.sigmoid(model.predict(test_nums_flattened))

<tf.Tensor: shape=(10000, 2), dtype=float32, numpy=
array([[9.9946481e-01, 9.9999905e-01],
       [2.8752316e-07, 3.0357973e-04],
       [1.4505866e-03, 9.9956673e-01],
       ...,
       [1.1431986e-04, 1.5388062e-04],
       [1.7400922e-03, 9.9965847e-01],
       [1.9897262e-09, 2.9188783e-07]], dtype=float32)>

#### loss function2: sigmoid cross entropy
use `tf.nn.sigmoid_cross_entropy_with_logits` as loss function

In [15]:
m_part1 = keras.Sequential([keras.layers.Dense(50, activation='relu', input_shape=[784]),
                                                            keras.layers.Dense(30, activation='relu')])
m_part2 = keras.Sequential([
    keras.layers.Dense(50, activation='relu', input_shape=[30]),
    keras.layers.Dense(2)])
model2 = keras.Sequential([m_part1, m_part2])

In [16]:
def loss_op(y, y_pred):
    loss=tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_pred)
    # print(loss)
    return tf.reduce_mean(tf.reduce_sum(loss))
model2.compile(optimizer='rmsprop', loss=loss_op, metrics=['accuracy'])

In [17]:
model2.fit(train_nums_flattened, y_multilabel.astype(np.float32), batch_size=64, epochs=5)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f5a7a812780>

In [18]:
model2.evaluate(test_nums_flattened, y_test_multilabel.astype(np.float32))



[4.114106161737442, 0.464]

In [19]:
test_labels

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

In [20]:
y_test_multilabel

array([[1, 1],
       [0, 0],
       [0, 1],
       ...,
       [0, 0],
       [0, 1],
       [0, 0]])

In [21]:
model2.predict(test_nums_flattened)

array([[  7.4471025,   9.31081  ],
       [-15.517901 ,  -6.6242085],
       [ -8.165118 ,   8.104742 ],
       ...,
       [-10.8731985,  -8.889265 ],
       [-11.579703 ,  13.985869 ],
       [-20.471838 , -17.561388 ]], dtype=float32)

##### these two labels look like independent

In [22]:
tf.math.sigmoid(model2.predict(test_nums_flattened))

<tf.Tensor: shape=(10000, 2), dtype=float32, numpy=
array([[9.9941719e-01, 9.9990952e-01],
       [1.8224725e-07, 1.3260703e-03],
       [2.8432216e-04, 9.9969804e-01],
       ...,
       [1.8959272e-05, 1.3784194e-04],
       [9.3539420e-06, 9.9999917e-01],
       [1.2858601e-09, 2.3614898e-08]], dtype=float32)>