In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
@tf.custom_gradient
def GradientReversalOperator(x):
    def grad(dy):
        return -1 * dy
    return x, grad

class GradientReversalLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(GradientReversalLayer, self).__init__()
        
    def call(self, inputs):
        return GradientReversalOperator(inputs)

In [3]:
class MNIST():
    def __init__(self, input_shape):
        super(MNIST, self).__init__()
        self.feature_extractor = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(filters=32, kernel_size=5,
                                   strides=1, input_shape=input_shape),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
            tf.keras.layers.Conv2D(filters=48, kernel_size=5, strides=1),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
            tf.keras.layers.Flatten()            
        ])
        
        self.label_predictor = tf.keras.models.Sequential([
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        
        self.domain_predictor = tf.keras.models.Sequential([
            GradientReversalLayer(),
            tf.keras.layers.Dense(100),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(2),
            tf.keras.layers.Activation('sigmoid')          
        ])
        self.path_1 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.label_predictor
        ])
        self.path_2 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.domain_predictor
        ])
        
        
        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_2 = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_3 = tf.keras.losses.SparseCategoricalCrossentropy()
        
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        self.optimizer_2 = tf.keras.optimizers.Adam(learning_rate=0.001)
        
        self.train_loss = tf.keras.metrics.Mean()
        self.train_loss_2 = tf.keras.metrics.Mean()
        
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.train_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        
        
        self.test_loss = tf.keras.metrics.Mean()
        self.test_loss_2 = tf.keras.metrics.Mean()
        self.test_loss_3 = tf.keras.metrics.Mean()
        self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_3 = tf.keras.metrics.SparseCategoricalAccuracy()
    
    @tf.function
    def train_both(self, x_class, y_class, x_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            loss_1 = self.loss(y_class, y_class_pred)   
        grad_1 = tape.gradient(loss_1, self.path_1.trainable_variables)
        
        with tf.GradientTape() as tape:
            y_domain_pred = self.path_2(x_both)
            loss_2 = self.loss_2(domain_labels, y_domain_pred) 
        grad_2 = tape.gradient(loss_2, self.path_2.trainable_variables)
        
        self.optimizer.apply_gradients(zip(grad_1, self.path_1.trainable_variables))
        self.optimizer_2.apply_gradients(zip(grad_2, self.path_2.trainable_variables))
        self.train_loss(loss_1)
        self.train_accuracy(y_class, y_class_pred)
        
        self.train_loss_2(loss_2)
        self.train_accuracy_2(domain_labels, y_domain_pred)
        
        return
    
    @tf.function
    def test_both(self, x_class, y_class, x_domain, y_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            y_domain_pred = self.path_2(x_both)
            y_target_class_pred = self.path_1(x_domain)
            
            loss_1 = self.loss(y_class, y_class_pred)
            loss_2 = self.loss_2(domain_labels, y_domain_pred)
            loss_3 = self.loss_3(y_domain, y_target_class_pred)
            
        self.test_loss(loss_1)
        self.test_accuracy(y_class, y_class_pred)
        
        self.test_loss_2(loss_2)
        self.test_accuracy_2(domain_labels, y_domain_pred)
        
        self.test_loss_3(loss_3)
        self.test_accuracy_3(y_domain, y_target_class_pred)
        
        return


In [4]:
class SVHN():
    def __init__(self, input_shape):
        super(SVHN, self).__init__()
        self.feature_extractor = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(filters=64, kernel_size=5,
                                   strides=1, padding='same',
                                   input_shape=input_shape),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=3, strides=2),
            tf.keras.layers.Conv2D(filters=64, kernel_size=5, padding='same', strides=1),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.MaxPooling2D(pool_size=3, strides=2),
            tf.keras.layers.Conv2D(filters=128, kernel_size=5, padding='same', strides=1),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Flatten()            
        ])
        
        self.label_predictor = tf.keras.models.Sequential([
            tf.keras.layers.Dense(3072),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(2048),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        
        self.domain_predictor = tf.keras.models.Sequential([
            GradientReversalLayer(),
            tf.keras.layers.Dense(1024),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(1024),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Activation('relu'),
            tf.keras.layers.Dense(2),
            tf.keras.layers.Activation('sigmoid')          
        ])
        
        self.path_1 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.label_predictor
        ])
        
        self.path_2 = tf.keras.models.Sequential([
            self.feature_extractor,
            self.domain_predictor
        ])
        
        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_2 = tf.keras.losses.SparseCategoricalCrossentropy()
        self.loss_3 = tf.keras.losses.SparseCategoricalCrossentropy()
        
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        self.optimizer_2 = tf.keras.optimizers.Adam(learning_rate=0.0001)
        
        self.train_loss = tf.keras.metrics.Mean()
        self.train_loss_2 = tf.keras.metrics.Mean()
        
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.train_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        
        
        self.test_loss = tf.keras.metrics.Mean()
        self.test_loss_2 = tf.keras.metrics.Mean()
        self.test_loss_3 = tf.keras.metrics.Mean()
        self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_2 = tf.keras.metrics.SparseCategoricalAccuracy()
        self.test_accuracy_3 = tf.keras.metrics.SparseCategoricalAccuracy()
    
    @tf.function
    def train_both(self, x_class, y_class, x_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            loss_1 = self.loss(y_class, y_class_pred)   
        grad_1 = tape.gradient(loss_1, self.path_1.trainable_variables)
        
        with tf.GradientTape() as tape:
            y_domain_pred = self.path_2(x_both)
            loss_2 = self.loss_2(domain_labels, y_domain_pred) 
        grad_2 = tape.gradient(loss_2, self.path_2.trainable_variables)
        
        self.optimizer.apply_gradients(zip(grad_1, self.path_1.trainable_variables))
        self.optimizer_2.apply_gradients(zip(grad_2, self.path_2.trainable_variables))
        self.train_loss(loss_1)
        self.train_accuracy(y_class, y_class_pred)
        
        self.train_loss_2(loss_2)
        self.train_accuracy_2(domain_labels, y_domain_pred)
        
        return
    
    @tf.function
    def test_both(self, x_class, y_class, x_domain, y_domain):
        
        domain_labels = np.concatenate([np.zeros(len(x_class)), np.ones(len(x_domain))])
        
        x_both = tf.concat([x_class, x_domain], axis = 0)
        
        with tf.GradientTape() as tape:
            y_class_pred = self.path_1(x_class)
            y_domain_pred = self.path_2(x_both)
            y_target_class_pred = self.path_1(x_domain)
            
            loss_1 = self.loss(y_class, y_class_pred)
            loss_2 = self.loss_2(domain_labels, y_domain_pred)
            loss_3 = self.loss_3(y_domain, y_target_class_pred)
            
        self.test_loss(loss_1)
        self.test_accuracy(y_class, y_class_pred)
        
        self.test_loss_2(loss_2)
        self.test_accuracy_2(domain_labels, y_domain_pred)
        
        self.test_loss_3(loss_3)
        self.test_accuracy_3(y_domain, y_target_class_pred)
        
        return


In [5]:
x_train_mnist = np.load('../data/mnist/x_train.npy')
y_train_mnist = np.load('../data/mnist/y_train.npy')

x_test_mnist = np.load('../data/mnist/x_test.npy')
y_test_mnist = np.load('../data/mnist/y_test.npy')

In [6]:
x_train_svhn = np.load('../data/svhn/x_train.npy')
y_train_svhn = np.load('../data/svhn/y_train.npy')

x_test_svhn = np.load('../data/svhn/x_test.npy')
y_test_svhn = np.load('../data/svhn/y_test.npy')

In [7]:
x_train_mnist, x_test_mnist = x_train_mnist / 255.0, x_test_mnist / 255.0
x_train_svhn, x_test_svhn = x_train_svhn / 255.0, x_test_svhn / 255.0

In [8]:
x_train_mnist = tf.cast(x_train_mnist, tf.float32)
x_test_mnist = tf.cast(x_test_mnist, tf.float32)
x_train_svhn = tf.cast(x_train_svhn, tf.float32)
x_test_svhn = tf.cast(x_test_svhn, tf.float32)

In [9]:
def pad_image(x, y):
    
    paddings = tf.constant([[2, 2,], [2, 2]])
    
    new_x = tf.pad(x, paddings, "CONSTANT")
    
    return (new_x, y)

def duplicate_channel(x, y):

    new_x = tf.stack([x, x, x], axis = -1)
    
    return (new_x, y)

In [13]:
x_train_mnist.shape

TensorShape([60000, 28, 28])

In [14]:
x_test_mnist.shape

TensorShape([10000, 28, 28])

In [15]:
x_train_svhn.shape

TensorShape([73257, 32, 32, 3])

In [10]:
mnist_train_dataset = tf.data.Dataset.from_tensor_slices((x_train_mnist, y_train_mnist))
mnist_train_dataset = mnist_train_dataset.map(pad_image)
mnist_train_dataset = mnist_train_dataset.map(duplicate_channel)
target_train_dataset = mnist_train_dataset.shuffle(len(y_train_mnist))

mnist_test_dataset = tf.data.Dataset.from_tensor_slices((x_test_mnist, y_test_mnist))
mnist_test_dataset = mnist_test_dataset.map(pad_image)
mnist_test_dataset = mnist_test_dataset.map(duplicate_channel)
target_test_dataset = mnist_test_dataset.shuffle(len(y_test_mnist))

svhn_train_dataset = tf.data.Dataset.from_tensor_slices((x_train_svhn, y_train_svhn))
source_train_dataset = svhn_train_dataset.shuffle(len(y_train_svhn))

svhn_test_dataset = tf.data.Dataset.from_tensor_slices((x_test_svhn, y_test_svhn))
source_test_dataset = svhn_train_dataset.shuffle(len(y_test_svhn))



source_train_dataset = source_train_dataset.batch(600)
source_train_dataset = source_train_dataset.prefetch(50)

target_train_dataset = target_train_dataset.batch(730)
target_train_dataset = target_train_dataset.prefetch(50)

source_test_dataset = source_test_dataset.batch(100)
source_test_dataset = source_test_dataset.prefetch(50)

# target_test_dataset = target_test_dataset.batch(500)
# target_test_dataset = target_test_dataset.prefetch(50)

In [11]:
model = SVHN(input_shape=(32, 32, 3))

In [12]:
EPOCHS = 100

for epoch in range(EPOCHS):
    for (source_images, class_labels), (target_images, _) in zip(source_train_dataset, target_train_dataset):
        model.train_both(source_images, class_labels, target_images)

    for (test_images, test_labels), (target_images, target_labels) in zip(source_test_dataset, target_train_dataset):
        model.test_both(test_images, test_labels, target_images, target_labels)

    template = 'Epoch: {}\n' + \
    'L1: {:.4f}, Acc1: {:.2f}, L1 Test: {:.4f}, Acc1 Test: {:.2f}\n'+ \
    'L2: {:.4f}, Acc2: {:.2f}, L2 Test: {:.4f}, Acc2 Test: {:.2f}\n'+ \
    'L3 Test: {:.4f}, Acc3 Test: {:.2f}\n'
    
    
    print(template.format(epoch+1,
                         model.train_loss.result(),
                         model.train_accuracy.result()*100,
                         model.test_loss.result(),
                         model.test_accuracy.result()*100,
                         model.train_loss_2.result(),
                         model.train_accuracy_2.result()*100,
                         model.test_loss_2.result(),
                         model.test_accuracy_2.result()*100,
                         model.test_loss_3.result(),
                         model.test_accuracy_3.result()*100))

W0627 19:21:48.579602 11780 deprecation.py:323] From c:\users\jw\appdata\local\programs\python\python36\lib\site-packages\tensorflow\python\ops\math_grad.py:1220: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch: 1
L1: 2.2517, Acc1: 19.61, L1 Test: 1.9080, Acc1 Test: 30.87
L2: 0.6280, Acc2: 70.62, L2 Test: 0.7181, Acc2 Test: 54.99
L3 Test: 2.0642, Acc3 Test: 22.20

Epoch: 2
L1: 1.6619, Acc1: 42.32, L1 Test: 1.2618, Acc1 Test: 55.62
L2: 0.4935, Acc2: 78.23, L2 Test: 0.5862, Acc2 Test: 69.74
L3 Test: 2.0241, Acc3 Test: 40.62

Epoch: 3
L1: 1.2745, Acc1: 56.33, L1 Test: 0.9733, Acc1 Test: 66.35
L2: 0.4567, Acc2: 80.02, L2 Test: 0.4078, Acc2 Test: 79.45
L3 Test: 1.9035, Acc3 Test: 47.06

Epoch: 4
L1: 1.0507, Acc1: 64.30, L1 Test: 0.8178, Acc1 Test: 72.15
L2: 0.3494, Acc2: 84.84, L2 Test: 0.3280, Acc2 Test: 83.80
L3 Test: 1.9321, Acc3 Test: 49.59

Epoch: 5
L1: 0.9043, Acc1: 69.51, L1 Test: 0.7124, Acc1 Test: 76.00
L2: 0.2825, Acc2: 87.79, L2 Test: 0.2631, Acc2 Test: 87.03
L3 Test: 1.9010, Acc3 Test: 52.28

Epoch: 6
L1: 0.7999, Acc1: 73.19, L1 Test: 0.6379, Acc1 Test: 78.68
L2: 0.2396, Acc2: 89.69, L2 Test: 0.2755, Acc2 Test: 87.62
L3 Test: 1.9005, Acc3 Test: 54.69

Epoch: 7
L1: 0.7223, Acc1: 7

Epoch: 52
L1: 0.1336, Acc1: 95.63, L1 Test: 0.2226, Acc1 Test: 94.88
L2: 0.0375, Acc2: 98.49, L2 Test: 0.0372, Acc2 Test: 98.46
L3 Test: 9.7329, Acc3 Test: 59.98

Epoch: 53
L1: 0.1312, Acc1: 95.71, L1 Test: 0.2220, Acc1 Test: 94.93
L2: 0.0368, Acc2: 98.52, L2 Test: 0.0366, Acc2 Test: 98.49
L3 Test: 9.8088, Acc3 Test: 60.04

Epoch: 54
L1: 0.1290, Acc1: 95.78, L1 Test: 0.2208, Acc1 Test: 94.99
L2: 0.0364, Acc2: 98.53, L2 Test: 0.0359, Acc2 Test: 98.52
L3 Test: 9.9375, Acc3 Test: 59.94

Epoch: 55
L1: 0.1270, Acc1: 95.85, L1 Test: 0.2198, Acc1 Test: 95.04
L2: 0.0358, Acc2: 98.56, L2 Test: 0.0354, Acc2 Test: 98.54
L3 Test: 10.0345, Acc3 Test: 59.93

Epoch: 56
L1: 0.1250, Acc1: 95.92, L1 Test: 0.2188, Acc1 Test: 95.10
L2: 0.0352, Acc2: 98.58, L2 Test: 0.0348, Acc2 Test: 98.56
L3 Test: 10.1465, Acc3 Test: 59.92



KeyboardInterrupt: 