In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
@tf.custom_gradient
def GradientReversalOperator(x):
    def grad(dy):
        return -1 * dy
    return x, grad

class GradientReversalLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(GradientReversalLayer, self).__init__()
        
    def call(self, inputs):
        return GradientReversalOperator(inputs)

In [3]:
class MNIST():
    def __init__(self, input_shape):
        super(MNIST, self).__init__()
        self.feature_extractor = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(filters=32, kernel_size=5,
                                   strides=1, input_shape=input_shape),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
            tf.keras.layers.Conv2D(filters=48, kernel_size=5, strides=1),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
            tf.keras.layers.Flatten()            
        ])
        
        self.label_predictor = tf.keras.models.Sequential([
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        
        self.domain_predictor = tf.keras.models.Sequential([
            GradientReversalLayer(),
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(2),
            tf.keras.layers.Activation('sigmoid')          
        ])
        self.path_1 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.label_predictor
        ])
        self.path_2 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.domain_predictor
        ])
        
        
        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_2 = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_3 = tf.keras.losses.SparseCategoricalCrossentropy()
        
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
        self.optimizer_2 = tf.keras.optimizers.Adam(learning_rate=0.0001)
        
        self.train_loss = tf.keras.metrics.Mean()
        self.train_loss_2 = tf.keras.metrics.Mean()
        
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.train_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        
        
        self.test_loss = tf.keras.metrics.Mean()
        self.test_loss_2 = tf.keras.metrics.Mean()
        self.test_loss_3 = tf.keras.metrics.Mean()
        self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_3 = tf.keras.metrics.SparseCategoricalAccuracy()
    
    @tf.function
    def train_both(self, x_class, y_class, x_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            loss_1 = self.loss(y_class, y_class_pred)   
        grad_1 = tape.gradient(loss_1, self.path_1.trainable_variables)
        
        with tf.GradientTape() as tape:
            y_domain_pred = self.path_2(x_both)
            loss_2 = self.loss_2(domain_labels, y_domain_pred) 
        grad_2 = tape.gradient(loss_2, self.path_2.trainable_variables)
        
        self.optimizer.apply_gradients(zip(grad_1, self.path_1.trainable_variables))
        self.optimizer_2.apply_gradients(zip(grad_2, self.path_2.trainable_variables))
        self.train_loss(loss_1)
        self.train_accuracy(y_class, y_class_pred)
        
        self.train_loss_2(loss_2)
        self.train_accuracy_2(domain_labels, y_domain_pred)
        
        return
    
    @tf.function
    def test_both(self, x_class, y_class, x_domain, y_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            y_domain_pred = self.path_2(x_both)
            y_target_class_pred = self.path_1(x_domain)
            
            loss_1 = self.loss(y_class, y_class_pred)
            loss_2 = self.loss_2(domain_labels, y_domain_pred)
            loss_3 = self.loss_3(y_domain, y_target_class_pred)
            
        self.test_loss(loss_1)
        self.test_accuracy(y_class, y_class_pred)
        
        self.test_loss_2(loss_2)
        self.test_accuracy_2(domain_labels, y_domain_pred)
        
        self.test_loss_3(loss_3)
        self.test_accuracy_3(y_domain, y_target_class_pred)
        
        return


In [4]:
x_train_mnist = np.load('../data/mnist/x_train.npy')
y_train_mnist = np.load('../data/mnist/y_train.npy')

x_test_mnist = np.load('../data/mnist/x_test.npy')
y_test_mnist = np.load('../data/mnist/y_test.npy')

In [5]:
x_train_svhn = np.load('../data/svhn/x_train.npy')
y_train_svhn = np.load('../data/svhn/y_train.npy')

x_test_svhn = np.load('../data/svhn/x_test.npy')
y_test_svhn = np.load('../data/svhn/y_test.npy')

In [6]:
x_train_mnist, x_test_mnist = x_train_mnist / 255.0, x_test_mnist / 255.0
x_train_svhn, x_test_svhn = x_train_svhn / 255.0, x_test_svhn / 255.0

In [7]:
x_train_mnist = tf.cast(x_train_mnist, tf.float32)
x_test_mnist = tf.cast(x_test_mnist, tf.float32)
x_train_svhn = tf.cast(x_train_svhn, tf.float32)
x_test_svhn = tf.cast(x_test_svhn, tf.float32)

In [8]:
def pad_image(x, y):
    
    paddings = tf.constant([[2, 2,], [2, 2]])
    
    new_x = tf.pad(x, paddings, "CONSTANT")
    
    return (new_x, y)

def duplicate_channel(x, y):

    new_x = tf.stack([x, x, x], axis = -1)
    
    return (new_x, y)

In [9]:
mnist_train_dataset = tf.data.Dataset.from_tensor_slices((x_train_mnist, y_train_mnist))
mnist_train_dataset = mnist_train_dataset.map(pad_image)
source_train_dataset = mnist_train_dataset.map(duplicate_channel)

source_train_dataset = source_train_dataset.shuffle(len(y_train_mnist))
source_train_dataset = source_train_dataset.batch(1000, drop_remainder=True)
source_train_dataset = source_train_dataset.prefetch(5)


svhn_train_dataset = tf.data.Dataset.from_tensor_slices((x_train_svhn, y_train_svhn))

target_train_dataset = svhn_train_dataset.shuffle(len(y_train_svhn))
target_train_dataset = target_train_dataset.batch(1000, drop_remainder=True)
target_train_dataset = target_train_dataset.prefetch(5)

In [10]:
model = MNIST(input_shape=(32, 32, 3))

In [11]:
EPOCHS = 10

for epoch in range(EPOCHS):
    for (source_images, class_labels), (target_images, _) in zip(source_train_dataset, target_train_dataset):
        model.train_both(source_images, class_labels, target_images)

    for (test_images, test_labels), (target_images, target_labels) in zip(source_train_dataset, target_train_dataset):
        model.test_both(test_images, test_labels, target_images, target_labels)

    template = 'Epoch: {}\n' + \
    'L1: {:.4f}, Acc1: {:.2f}, L1 Test: {:.4f}, Acc1 Test: {:.2f}\n'+ \
    'L2: {:.4f}, Acc2: {:.2f}, L2 Test: {:.4f}, Acc2 Test: {:.2f}\n'+ \
    'L3 Test: {:.4f}, Acc3 Test: {:.2f}\n'
    
    
    print(template.format(epoch+1,
                         model.train_loss.result(),
                         model.train_accuracy.result()*100,
                         model.test_loss.result(),
                         model.test_accuracy.result()*100,
                         model.train_loss_2.result(),
                         model.train_accuracy_2.result()*100,
                         model.test_loss_2.result(),
                         model.test_accuracy_2.result()*100,
                         model.test_loss_3.result(),
                         model.test_accuracy_3.result()*100))

W0627 16:46:25.631713 22032 deprecation.py:323] From c:\users\jw\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\math_grad.py:1220: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch: 1
L1: 1.8629, Acc1: 55.40, L1 Test: 1.2118, Acc1 Test: 78.27
L2: 0.6037, Acc2: 68.59, L2 Test: 0.3511, Acc2 Test: 95.23
L3 Test: 2.4768, Acc3 Test: 13.83

Epoch: 2
L1: 1.3115, Acc1: 69.66, L1 Test: 0.8394, Acc1 Test: 83.17
L2: 0.3697, Acc2: 83.42, L2 Test: 0.1978, Acc2 Test: 97.32
L3 Test: 2.8251, Acc3 Test: 12.60

Epoch: 3
L1: 1.0034, Acc1: 76.47, L1 Test: 0.6795, Acc1 Test: 85.84
L2: 0.2698, Acc2: 88.56, L2 Test: 0.1769, Acc2 Test: 97.41
L3 Test: 2.9550, Acc3 Test: 12.65

Epoch: 4
L1: 0.8334, Acc1: 80.36, L1 Test: 0.5835, Acc1 Test: 87.62
L2: 0.2301, Acc2: 91.09, L2 Test: 0.1727, Acc2 Test: 97.70
L3 Test: 2.8150, Acc3 Test: 13.50

Epoch: 5
L1: 0.7186, Acc1: 82.96, L1 Test: 0.5139, Acc1 Test: 88.87
L2: 0.2197, Acc2: 92.41, L2 Test: 0.1831, Acc2 Test: 97.45
L3 Test: 2.7153, Acc3 Test: 14.34

Epoch: 6
L1: 0.6348, Acc1: 84.86, L1 Test: 0.4604, Acc1 Test: 89.84
L2: 0.2147, Acc2: 92.97, L2 Test: 0.2250, Acc2 Test: 93.08
L3 Test: 2.6362, Acc3 Test: 15.38

Epoch: 7
L1: 0.5695, Acc1: 8