In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
@tf.custom_gradient
def GradientReversalOperator(x):
    def grad(dy):
        return -1 * dy
    return x, grad

class GradientReversalLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(GradientReversalLayer, self).__init__()
        
    def call(self, inputs):
        return GradientReversalOperator(inputs)

In [3]:
class MNIST():
    def __init__(self, input_shape):
        super(MNIST, self).__init__()
        self.feature_extractor = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(filters=32, kernel_size=5,
                                   strides=1, input_shape=input_shape),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
            tf.keras.layers.Conv2D(filters=48, kernel_size=5, strides=1),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
            tf.keras.layers.Flatten()            
        ])
        
        self.label_predictor = tf.keras.models.Sequential([
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        
        self.domain_predictor = tf.keras.models.Sequential([
            GradientReversalLayer(),
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(2),
            tf.keras.layers.Activation('sigmoid')          
        ])
        self.path_1 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.label_predictor
        ])
        self.path_2 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.domain_predictor
        ])
        
        
        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_2 = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_3 = tf.keras.losses.SparseCategoricalCrossentropy()
        
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        self.optimizer_2 = tf.keras.optimizers.Adam(learning_rate=0.001)
        
        self.train_loss = tf.keras.metrics.Mean()
        self.train_loss_2 = tf.keras.metrics.Mean()
        
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.train_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        
        
        self.test_loss = tf.keras.metrics.Mean()
        self.test_loss_2 = tf.keras.metrics.Mean()
        self.test_loss_3 = tf.keras.metrics.Mean()
        self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_3 = tf.keras.metrics.SparseCategoricalAccuracy()
    
    @tf.function
    def train_both(self, x_class, y_class, x_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            loss_1 = self.loss(y_class, y_class_pred)   
        grad_1 = tape.gradient(loss_1, self.path_1.trainable_variables)
        
        with tf.GradientTape() as tape:
            y_domain_pred = self.path_2(x_both)
            loss_2 = self.loss_2(domain_labels, y_domain_pred) 
        grad_2 = tape.gradient(loss_2, self.path_2.trainable_variables)
        
        self.optimizer.apply_gradients(zip(grad_1, self.path_1.trainable_variables))
        self.optimizer_2.apply_gradients(zip(grad_2, self.path_2.trainable_variables))
        self.train_loss(loss_1)
        self.train_accuracy(y_class, y_class_pred)
        
        self.train_loss_2(loss_2)
        self.train_accuracy_2(domain_labels, y_domain_pred)
        
        return
    
    @tf.function
    def test_both(self, x_class, y_class, x_domain, y_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            y_domain_pred = self.path_2(x_both)
            y_target_class_pred = self.path_1(x_domain)
            
            loss_1 = self.loss(y_class, y_class_pred)
            loss_2 = self.loss_2(domain_labels, y_domain_pred)
            loss_3 = self.loss_3(y_domain, y_target_class_pred)
            
        self.test_loss(loss_1)
        self.test_accuracy(y_class, y_class_pred)
        
        self.test_loss_2(loss_2)
        self.test_accuracy_2(domain_labels, y_domain_pred)
        
        self.test_loss_3(loss_3)
        self.test_accuracy_3(y_domain, y_target_class_pred)
        
        return


In [4]:
class SVHN():
    def __init__(self, input_shape):
        super(SVHN, self).__init__()
        self.feature_extractor = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(filters=64, kernel_size=5,
                                   strides=1, padding='same',
                                   input_shape=input_shape),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=3, strides=2),
            tf.keras.layers.Conv2D(filters=64, kernel_size=5, padding='same', strides=1),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=3, strides=2),
            tf.keras.layers.Conv2D(filters=128, kernel_size=5, padding='same', strides=1),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Flatten()            
        ])
        
        self.label_predictor = tf.keras.models.Sequential([
            tf.keras.layers.Dense(3072),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(2048),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        
        self.domain_predictor = tf.keras.models.Sequential([
            GradientReversalLayer(),
            tf.keras.layers.Dense(1024),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(1024),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(2),
            tf.keras.layers.Activation('sigmoid')          
        ])
        
        self.path_1 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.label_predictor
        ])
        
        self.path_2 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.domain_predictor
        ])
        
        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_2 = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_3 = tf.keras.losses.SparseCategoricalCrossentropy()
        
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        self.optimizer_2 = tf.keras.optimizers.Adam(learning_rate=0.0001)
        
        self.train_loss = tf.keras.metrics.Mean()
        self.train_loss_2 = tf.keras.metrics.Mean()
        
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.train_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        
        
        self.test_loss = tf.keras.metrics.Mean()
        self.test_loss_2 = tf.keras.metrics.Mean()
        self.test_loss_3 = tf.keras.metrics.Mean()
        self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_3 = tf.keras.metrics.SparseCategoricalAccuracy()
    
    @tf.function
    def train_both(self, x_class, y_class, x_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            loss_1 = self.loss(y_class, y_class_pred)   
        grad_1 = tape.gradient(loss_1, self.path_1.trainable_variables)
        
        with tf.GradientTape() as tape:
            y_domain_pred = self.path_2(x_both)
            loss_2 = self.loss_2(domain_labels, y_domain_pred) 
        grad_2 = tape.gradient(loss_2, self.path_2.trainable_variables)
        
        self.optimizer.apply_gradients(zip(grad_1, self.path_1.trainable_variables))
        self.optimizer_2.apply_gradients(zip(grad_2, self.path_2.trainable_variables))
        self.train_loss(loss_1)
        self.train_accuracy(y_class, y_class_pred)
        
        self.train_loss_2(loss_2)
        self.train_accuracy_2(domain_labels, y_domain_pred)
        
        return
    
    @tf.function
    def test_both(self, x_class, y_class, x_domain, y_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            y_domain_pred = self.path_2(x_both)
            y_target_class_pred = self.path_1(x_domain)
            
            loss_1 = self.loss(y_class, y_class_pred)
            loss_2 = self.loss_2(domain_labels, y_domain_pred)
            loss_3 = self.loss_3(y_domain, y_target_class_pred)
            
        self.test_loss(loss_1)
        self.test_accuracy(y_class, y_class_pred)
        
        self.test_loss_2(loss_2)
        self.test_accuracy_2(domain_labels, y_domain_pred)
        
        self.test_loss_3(loss_3)
        self.test_accuracy_3(y_domain, y_target_class_pred)
        
        return


In [5]:
x_train_mnist = np.load('../data/mnist/x_train.npy')
y_train_mnist = np.load('../data/mnist/y_train.npy')

x_test_mnist = np.load('../data/mnist/x_test.npy')
y_test_mnist = np.load('../data/mnist/y_test.npy')

In [6]:
x_train_svhn = np.load('../data/svhn/x_train.npy')
y_train_svhn = np.load('../data/svhn/y_train.npy')

x_test_svhn = np.load('../data/svhn/x_test.npy')
y_test_svhn = np.load('../data/svhn/y_test.npy')

In [7]:
x_train_mnist, x_test_mnist = x_train_mnist / 255.0, x_test_mnist / 255.0
x_train_svhn, x_test_svhn = x_train_svhn / 255.0, x_test_svhn / 255.0

In [8]:
x_train_mnist = tf.cast(x_train_mnist, tf.float32)
x_test_mnist = tf.cast(x_test_mnist, tf.float32)
x_train_svhn = tf.cast(x_train_svhn, tf.float32)
x_test_svhn = tf.cast(x_test_svhn, tf.float32)

In [9]:
def pad_image(x, y):
    
    paddings = tf.constant([[2, 2,], [2, 2]])
    
    new_x = tf.pad(x, paddings, "CONSTANT")
    
    return (new_x, y)

def duplicate_channel(x, y):

    new_x = tf.stack([x, x, x], axis = -1)
    
    return (new_x, y)

In [10]:
x_train_mnist.shape

TensorShape([60000, 28, 28])

In [11]:
x_test_mnist.shape

TensorShape([10000, 28, 28])

In [12]:
x_train_svhn.shape

TensorShape([73257, 32, 32, 3])

In [13]:
x_test_svhn.shape

TensorShape([26032, 32, 32, 3])

In [14]:
mnist_train_dataset = tf.data.Dataset.from_tensor_slices((x_train_mnist, y_train_mnist))
mnist_train_dataset = mnist_train_dataset.map(pad_image)
mnist_train_dataset = mnist_train_dataset.map(duplicate_channel)
target_train_dataset = mnist_train_dataset.shuffle(len(y_train_mnist))

mnist_test_dataset = tf.data.Dataset.from_tensor_slices((x_test_mnist, y_test_mnist))
mnist_test_dataset = mnist_test_dataset.map(pad_image)
mnist_test_dataset = mnist_test_dataset.map(duplicate_channel)
target_test_dataset = mnist_test_dataset.shuffle(len(y_test_mnist))

svhn_train_dataset = tf.data.Dataset.from_tensor_slices((x_train_svhn, y_train_svhn))
source_train_dataset = svhn_train_dataset.shuffle(len(y_train_svhn))

svhn_test_dataset = tf.data.Dataset.from_tensor_slices((x_test_svhn, y_test_svhn))
source_test_dataset = svhn_train_dataset.shuffle(len(y_test_svhn))



source_train_dataset = source_train_dataset.batch(730)
source_train_dataset = source_train_dataset.prefetch(50)

source_test_dataset = source_test_dataset.batch(260)
source_test_dataset = source_test_dataset.prefetch(50)

target_train_dataset = target_train_dataset.batch(600)
target_train_dataset = target_train_dataset.prefetch(50)

# target_test_dataset = target_test_dataset.batch(500)
# target_test_dataset = target_test_dataset.prefetch(50)

In [15]:
model = SVHN(input_shape=(32, 32, 3))

In [16]:
EPOCHS = 100

for epoch in range(EPOCHS):
    for (source_images, class_labels), (target_images, _) in zip(source_train_dataset, target_train_dataset):
        model.train_both(source_images, class_labels, target_images)

    for (test_images, test_labels), (target_images, target_labels) in zip(source_test_dataset, target_train_dataset):
        model.test_both(test_images, test_labels, target_images, target_labels)

    template = 'Epoch: {}\n' + \
    'L1: {:.4f}, Acc1: {:.2f}, L1 Test: {:.4f}, Acc1 Test: {:.2f}\n'+ \
    'L2: {:.4f}, Acc2: {:.2f}, L2 Test: {:.4f}, Acc2 Test: {:.2f}\n'+ \
    'L3 Test: {:.4f}, Acc3 Test: {:.2f}\n'
    
    
    print(template.format(epoch+1,
                         model.train_loss.result(),
                         model.train_accuracy.result()*100,
                         model.test_loss.result(),
                         model.test_accuracy.result()*100,
                         model.train_loss_2.result(),
                         model.train_accuracy_2.result()*100,
                         model.test_loss_2.result(),
                         model.test_accuracy_2.result()*100,
                         model.test_loss_3.result(),
                         model.test_accuracy_3.result()*100))

W0627 19:55:17.236991 19648 deprecation.py:323] From c:\users\jw\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\math_grad.py:1220: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch: 1
L1: 2.2226, Acc1: 20.47, L1 Test: 1.9192, Acc1 Test: 31.99
L2: 0.6650, Acc2: 65.76, L2 Test: 0.5417, Acc2 Test: 99.49
L3 Test: 2.1667, Acc3 Test: 18.65

Epoch: 2
L1: 1.6695, Acc1: 41.79, L1 Test: 1.2615, Acc1 Test: 56.57
L2: 0.6566, Acc2: 70.22, L2 Test: 0.3700, Acc2 Test: 98.04
L3 Test: 2.6132, Acc3 Test: 32.98

Epoch: 3
L1: 1.2751, Acc1: 56.21, L1 Test: 0.9863, Acc1 Test: 66.54
L2: 0.4785, Acc2: 79.10, L2 Test: 0.2922, Acc2 Test: 96.88
L3 Test: 2.4809, Acc3 Test: 41.17

Epoch: 4
L1: 1.0527, Acc1: 64.22, L1 Test: 0.8234, Acc1 Test: 72.38
L2: 0.5205, Acc2: 79.46, L2 Test: 0.2518, Acc2 Test: 96.41
L3 Test: 2.5056, Acc3 Test: 44.31

Epoch: 5
L1: 0.9068, Acc1: 69.42, L1 Test: 0.7163, Acc1 Test: 76.16
L2: 0.4413, Acc2: 82.79, L2 Test: 0.2257, Acc2 Test: 96.32
L3 Test: 2.7590, Acc3 Test: 45.94

Epoch: 6
L1: 0.8020, Acc1: 73.13, L1 Test: 0.6402, Acc1 Test: 78.83
L2: 0.3749, Acc2: 85.47, L2 Test: 0.1912, Acc2 Test: 96.85
L3 Test: 2.6585, Acc3 Test: 47.88

Epoch: 7
L1: 0.7246, Acc1: 7

Epoch: 52
L1: 0.1367, Acc1: 95.54, L1 Test: 0.1331, Acc1 Test: 95.74
L2: 0.0633, Acc2: 97.76, L2 Test: 0.0388, Acc2 Test: 98.91
L3 Test: 8.4220, Acc3 Test: 56.42

Epoch: 53
L1: 0.1343, Acc1: 95.62, L1 Test: 0.1308, Acc1 Test: 95.81
L2: 0.0621, Acc2: 97.80, L2 Test: 0.0380, Acc2 Test: 98.93
L3 Test: 8.4547, Acc3 Test: 56.49

Epoch: 54
L1: 0.1320, Acc1: 95.69, L1 Test: 0.1288, Acc1 Test: 95.88
L2: 0.0609, Acc2: 97.84, L2 Test: 0.0373, Acc2 Test: 98.95
L3 Test: 8.5099, Acc3 Test: 56.53

Epoch: 55
L1: 0.1299, Acc1: 95.76, L1 Test: 0.1273, Acc1 Test: 95.93
L2: 0.0598, Acc2: 97.88, L2 Test: 0.0367, Acc2 Test: 98.97
L3 Test: 8.5275, Acc3 Test: 56.72

Epoch: 56
L1: 0.1278, Acc1: 95.83, L1 Test: 0.1252, Acc1 Test: 96.00
L2: 0.0588, Acc2: 97.92, L2 Test: 0.0360, Acc2 Test: 98.99
L3 Test: 8.5572, Acc3 Test: 56.80

Epoch: 57
L1: 0.1258, Acc1: 95.90, L1 Test: 0.1233, Acc1 Test: 96.06
L2: 0.0577, Acc2: 97.95, L2 Test: 0.0354, Acc2 Test: 99.01
L3 Test: 8.6032, Acc3 Test: 56.90

Epoch: 58
L1: 0.1238, 