## Purpose

This notebook implements the DaNN.

In [1]:
import tensorflow as tf
import math
from keras.datasets import mnist
from keras.utils import to_categorical
from keras import layers
from keras import models
from keras.models import Model
from keras import backend as K
from keras.regularizers import Regularizer
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D, MaxPooling2D
from keras.layers.convolutional import UpSampling2D, Conv2D
import keras
import warnings
warnings.filterwarnings('ignore')
import cv2
import numpy as np
import h5py

Using TensorFlow backend.


## Data

In [2]:
!git clone https://github.com/NuriaSF/ML_DomainAdaptation

fatal: destination path 'ML_DomainAdaptation' already exists and is not an empty directory.


In [3]:
cd /content/ML_DomainAdaptation/Statistic_Criterion/

/content/ML_DomainAdaptation/Statistic_Criterion


##### Let us begin by loading and preprocessing the source data: MNIST

In [0]:
(source_train_images, source_train_labels), (source_test_images, source_test_labels) = mnist.load_data()

In [0]:
source_train_images = source_train_images.reshape((60000, 28, 28, 1))

source_train_images = source_train_images.astype('float32') / 255

source_test_images = source_test_images.reshape((10000, 28, 28, 1))

source_test_images = source_test_images.astype('float32') / 255

source_train_labels = to_categorical(source_train_labels)
source_test_labels = to_categorical(source_test_labels)

##### Let us now load and preprocess the target data: USPS

In [0]:
with h5py.File('usps_dataset.h5', 'r') as hf:
        train = hf.get('train')
        target_train_images_aux = train.get('data')[:]
        target_train_labels = train.get('target')[:]
        test = hf.get('test')
        target_test_images_aux = test.get('data')[:]
        target_test_labels = test.get('target')[:]

In [0]:
target_train_images = []
target_test_images = []

for i in range(7291):
    img = target_train_images_aux[i].reshape(16,16)
    img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)
    target_train_images.append(img.flatten())
    
for i in range(2007):
    img = target_test_images_aux[i].reshape(16,16)
    img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)
    target_test_images.append(img.flatten())

In [0]:
#convert list to numpy arrays
target_train_images = np.asarray(target_train_images)
target_test_images = np.asarray(target_test_images)

#train images
target_train_images = target_train_images.reshape((7291, 28, 28, 1))
target_train_images = target_train_images.astype('float32')
for i in range(7291):
    min_aux = np.min(target_train_images[i])
    max_aux = np.max(target_train_images[i]-min_aux)
    target_train_images[i] = (target_train_images[i]-min_aux)/max_aux

#test images
target_test_images = target_test_images.reshape((2007, 28, 28, 1))
target_test_images = target_test_images.astype('float32')
for i in range(2007):
    min_aux = np.min(target_test_images[i])
    max_aux = np.max(target_test_images[i]-min_aux)
    target_test_images[i] = (target_test_images[i]-min_aux)/max_aux

#labels
target_train_labels = to_categorical(target_train_labels)
target_test_labels = to_categorical(target_test_labels)

## Loss

Let us now code the $MMD^2_e(\boldsymbol{q}_s, \boldsymbol{\bar{q}}_t)$ loss where
$$
\boldsymbol{q}_s=W_1^Tx_s+b
$$ 
and 
$$
\boldsymbol{\bar{q}}_t = W_1^Tx_t+b.
$$ 

In [0]:
def compute_pairwise_distances(x, y):
    
    if not len(x.get_shape()) == len(y.get_shape()) == 2:
        raise ValueError('Both inputs should be matrices.')

    if x.get_shape().as_list()[1] != y.get_shape().as_list()[1]:
        raise ValueError('The number of features should be the same.')

    norm = lambda x: tf.reduce_sum(tf.square(x), 1)
    
    return tf.transpose(norm(tf.expand_dims(x, 2) - tf.transpose(y)))

In [0]:
def gaussian_kernel_matrix(x, y, sigmas = tf.constant([1e-2, 1e-1, 1, 5, 10])):
    
    beta = 1. / (2. * (tf.expand_dims(sigmas, 1)))
    dist = compute_pairwise_distances(x, y)
    s = tf.matmul(beta, tf.reshape(dist, (1, -1)))
    
    return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist))

In [0]:
#loss
def mmd(y_true, y_pred):
    
    #revert the concatenation so as to recover the source and target outputs of the hidden layer
    y_pred_source = y_pred[:, :64]
    y_pred_target = y_pred[:, 64:]
    
    kernel = gaussian_kernel_matrix
    cost = tf.reduce_mean(kernel(y_pred_source, y_pred_source))
    cost += tf.reduce_mean(kernel(y_pred_target, y_pred_target))
    cost -= 2 * tf.reduce_mean(kernel(y_pred_source, y_pred_target))
    
    #cost has to be non-negative
    cost = tf.where(cost > 0, cost, 0)
    
    return cost

## Model

The model consists of a single hidden layer with 256 nodes.
* Inputs: the source and target inputs are paralelly fed into the model (i.e, the model does not change weights).
* Outputs: the network has two outputs. The first one is the prediction of the source data and the second one is the concatenation of the outputs of the source and target data by the hidden layer.

In [0]:
#Function to get batches
def get_all_batches(data,labels,batch_size):
  idx = np.arange(0,data.shape[0])
  np.random.shuffle(idx)

  data_shuffle = data[idx,:]
  labels_shuffle = labels[idx]

  labels_shuffle = labels_shuffle[0:labels.shape[0] - labels.shape[0]%batch_size]
  data_shuffle = data_shuffle[0:labels.shape[0] - labels.shape[0]%batch_size,:]
  
  num_batches = data_shuffle.shape[0]/batch_size 
  batches_x =np.split(data_shuffle,num_batches)
  batches_y =np.split(labels_shuffle,num_batches)

  return batches_x,batches_y,num_batches


In [0]:
#Define larger model (CNN + fully connected)


def get_base_network(input_shape):

    inp = Input(shape = input_shape)
    #CNN layer +  relu
    x = Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1), padding='same')(inp)
    #Max pooling
    x = MaxPooling2D(pool_size=(2, 2))(x)

    #CNN layer + relu
    x = Conv2D(48, kernel_size=(5, 5), activation='relu', padding='same')(x)
    #Max pooling
    x = MaxPooling2D(pool_size=(2, 2))(x)

    #CNN layer + relu
    x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(x)
    #Max pooling
    x = MaxPooling2D(pool_size=(2, 2))(x)

    #Flatten

    x = Flatten()(x)

    #FC layers
    x = Dense(256, activation='relu')(x)
    x = Dense(84, activation='relu')(x)

    #Hidden layer
    hidden = layers.Dense(64, activation='softplus', name='hidden')(x)

    return Model(inp, hidden)
  
input_shape = (28,28,1)

base_model= get_base_network(input_shape)

input_s = Input(shape=input_shape, name='source_input')
input_t = Input(shape=input_shape, name='target_input')

#Hidden layers

hidden_s = base_model(input_s)
hidden_t = base_model(input_t)

#concatenate outputs
aux_output = layers.concatenate([hidden_s, hidden_t], name='aux_output')

#Source Classifier
pred_s = layers.Dense(10, activation='softmax', name='source_output')(hidden_s)

#Target Classifier
pred_t = layers.Dense(10, activation='softmax', name='source_output')(hidden_t)


#Define target and source models

model_source = Model(input_s, pred_s)
model_target = Model([input_s,input_t],[aux_output,pred_t])

In [60]:
model_target.summary()

Model: "model_29"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
source_input (InputLayer)       (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
target_input (InputLayer)       (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
model_27 (Model)                (None, 64)           241732      source_input[0][0]               
                                                                 target_input[0][0]               
__________________________________________________________________________________________________
aux_output (Concatenate)        (None, 128)          0           model_27[1][0]            

In [0]:
model_source.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=0.001), metrics = ['accuracy'])
model_target.compile(loss=[mmd,'categorical_crossentropy'] ,loss_weights=[1.,1.], optimizer=keras.optimizers.Adam(lr=0.003), metrics = ['accuracy'])

In [87]:
epochs = 30
batch_size=64
best_acc_source = 0
best_acc_target = 0
lr = 0.0025
loss_t=1

for iterations in range(epochs):
  source_batch_xs, source_batch_ys, num_batches_source = get_all_batches(source_train_images,source_train_labels,batch_size)
  target_batch_xs, target_batch_ys, num_batches_target = get_all_batches(target_train_images,target_train_labels,batch_size)

  num_batches = int(np.min([num_batches_source,num_batches_target]))

  for batch in range(num_batches):
    #Train source for each batch
    print('Epoch: ', iterations, ' Batch: ', batch)
    loss, acc = model_source.train_on_batch(source_batch_xs[batch], source_batch_ys[batch])
    print('Source loss: ', loss, ' Source acc: ', acc)
    #Save best source model
    if acc>best_acc_source:
      best_acc_source = acc
      best_source_model = model_source
      best_source_model.save_weights('source_model.hdf5') 

    if loss_t<0.02 and lr>0.001:
      model_target.compile(loss=mmd, optimizer=keras.optimizers.Adam(lr=0.0001), metrics = ['accuracy'])
      lr=0.001
    #Train target model
    loss_t,loss_t2,loss_t3, acc_t2, acc_t = model_target.train_on_batch({'source_input': source_batch_xs[batch], 'target_input': target_batch_xs[batch]},
                     {'aux_output': np.zeros(batch_size), 'source_output': target_batch_ys[batch]})
    
    print('Target loss: ', loss_t, ' Target acc: ', acc_t)

    #Save best target model
    if acc>best_acc_target:
      best_acc_target = acc
      best_target_model = model_target
      best_target_model.save_weights('target_model.hdf5') 

      

Epoch:  0  Batch:  0
Source loss:  4.3772488e-07  Source acc:  1.0
Target loss:  2.3234582  Target acc:  0.859375
Epoch:  0  Batch:  1
Source loss:  1.192093e-07  Source acc:  1.0
Target loss:  1.2317281  Target acc:  0.90625
Epoch:  0  Batch:  2
Source loss:  0.0003737359  Source acc:  1.0
Target loss:  0.88927937  Target acc:  0.9375
Epoch:  0  Batch:  3
Source loss:  1.192093e-07  Source acc:  1.0
Target loss:  1.1640825  Target acc:  0.9375
Epoch:  0  Batch:  4
Source loss:  0.17716078  Source acc:  0.984375
Target loss:  1.662719  Target acc:  0.875
Epoch:  0  Batch:  5
Source loss:  0.25184536  Source acc:  0.984375
Target loss:  0.569361  Target acc:  0.96875
Epoch:  0  Batch:  6
Source loss:  0.346887  Source acc:  0.953125
Target loss:  0.7811986  Target acc:  0.953125
Epoch:  0  Batch:  7
Source loss:  0.50379366  Source acc:  0.96875
Target loss:  0.42461005  Target acc:  0.96875
Epoch:  0  Batch:  8
Source loss:  0.056980263  Source acc:  0.984375
Target loss:  0.6699019  T

In [88]:
model_source.evaluate(target_test_images,target_test_labels)



[0.21428900841834173, 0.9711011459890384]

In [89]:
model_source.evaluate(source_test_images,source_test_labels)



[0.05048544924123016, 0.9896]