## Purpose

This notebook implements the DaNN.

In [1]:
import tensorflow as tf
import math
from keras.datasets import mnist
from keras.utils import to_categorical
from keras import layers
from keras import models
from keras.models import Model
from keras import backend as K
from keras.regularizers import Regularizer
import keras
import warnings
warnings.filterwarnings('ignore')
import cv2
import numpy as np
import h5py

Using TensorFlow backend.


## Data

In [2]:
!git clone https://github.com/NuriaSF/ML_DomainAdaptation

fatal: destination path 'ML_DomainAdaptation' already exists and is not an empty directory.


In [3]:
cd /content/ML_DomainAdaptation/Statistic_Criterion/

/content/ML_DomainAdaptation/Statistic_Criterion


##### Let us begin by loading and preprocessing the source data: MNIST

In [0]:
(source_train_images, source_train_labels), (source_test_images, source_test_labels) = mnist.load_data()

In [0]:
#source_train_images = source_train_images.reshape((60000, 28, 28, 1))
source_train_images = source_train_images.reshape((60000, 28*28))
source_train_images = source_train_images.astype('float32') / 255

#source_test_images = source_test_images.reshape((10000, 28, 28, 1))
source_test_images = source_test_images.reshape((10000, 28*28))
source_test_images = source_test_images.astype('float32') / 255

source_train_labels = to_categorical(source_train_labels)
source_test_labels = to_categorical(source_test_labels)

##### Let us now load and preprocess the target data: USPS

In [0]:
with h5py.File('usps_dataset.h5', 'r') as hf:
        train = hf.get('train')
        target_train_images_aux = train.get('data')[:]
        target_train_labels = train.get('target')[:]
        test = hf.get('test')
        target_test_images_aux = test.get('data')[:]
        target_test_labels = test.get('target')[:]

In [0]:
target_train_images = []
target_test_images = []

for i in range(7291):
    img = target_train_images_aux[i].reshape(16,16)
    img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)
    target_train_images.append(img.flatten())
    
for i in range(2007):
    img = target_test_images_aux[i].reshape(16,16)
    img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)
    target_test_images.append(img.flatten())

In [0]:
#convert list to numpy arrays
target_train_images = np.asarray(target_train_images)
target_test_images = np.asarray(target_test_images)

#train images
#target_train_images = target_train_images.reshape((7291, 28, 28, 1))
target_train_images = target_train_images.astype('float32')
for i in range(7291):
    min_aux = np.min(target_train_images[i])
    max_aux = np.max(target_train_images[i]-min_aux)
    target_train_images[i] = (target_train_images[i]-min_aux)/max_aux

#test images
#target_test_images = target_test_images.reshape((2007, 28, 28, 1))
target_test_images = target_test_images.astype('float32')
for i in range(2007):
    min_aux = np.min(target_test_images[i])
    max_aux = np.max(target_test_images[i]-min_aux)
    target_test_images[i] = (target_test_images[i]-min_aux)/max_aux

#labels
target_train_labels = to_categorical(target_train_labels)
target_test_labels = to_categorical(target_test_labels)

## Loss

Let us now code the $MMD^2_e(\boldsymbol{q}_s, \boldsymbol{\bar{q}}_t)$ loss where
$$
\boldsymbol{q}_s=W_1^Tx_s+b
$$ 
and 
$$
\boldsymbol{\bar{q}}_t = W_1^Tx_t+b.
$$ 

In [0]:
def compute_pairwise_distances(x, y):
    
    if not len(x.get_shape()) == len(y.get_shape()) == 2:
        raise ValueError('Both inputs should be matrices.')

    if x.get_shape().as_list()[1] != y.get_shape().as_list()[1]:
        raise ValueError('The number of features should be the same.')

    norm = lambda x: tf.reduce_sum(tf.square(x), 1)
    
    return tf.transpose(norm(tf.expand_dims(x, 2) - tf.transpose(y)))

In [0]:
def gaussian_kernel_matrix(x, y, sigmas = tf.constant([1e-2, 1e-1, 1, 5, 10])):
    
    beta = 1. / (2. * (tf.expand_dims(sigmas, 1)))
    dist = compute_pairwise_distances(x, y)
    s = tf.matmul(beta, tf.reshape(dist, (1, -1)))
    
    return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist))

In [0]:
#loss
def mmd(y_true, y_pred):
    
    #revert the concatenation so as to recover the source and target outputs of the hidden layer
    y_pred_source = y_pred[:, :256]
    y_pred_target = y_pred[:, 256:]
    
    kernel = gaussian_kernel_matrix
    cost = tf.reduce_mean(kernel(y_pred_source, y_pred_source))
    cost += tf.reduce_mean(kernel(y_pred_target, y_pred_target))
    cost -= 2 * tf.reduce_mean(kernel(y_pred_source, y_pred_target))
    
    #cost has to be non-negative
    cost = tf.where(cost > 0, cost, 0)
    
    return cost

## Model

The model consists of a single hidden layer with 256 nodes.
* Inputs: the source and target inputs are paralelly fed into the model (i.e, the model does not change weights).
* Outputs: the network has two outputs. The first one is the prediction of the source data and the second one is the concatenation of the outputs of the source and target data by the hidden layer.

In [0]:
#Function to get batches
def get_all_batches(data,labels,batch_size):
  idx = np.arange(0,data.shape[0])
  np.random.shuffle(idx)

  data_shuffle = data[idx,:]
  labels_shuffle = labels[idx]

  labels_shuffle = labels_shuffle[0:labels.shape[0] - labels.shape[0]%batch_size]
  data_shuffle = data_shuffle[0:labels.shape[0] - labels.shape[0]%batch_size,:]
  
  num_batches = data_shuffle.shape[0]/batch_size 
  batches_x =np.split(data_shuffle,num_batches)
  batches_y =np.split(labels_shuffle,num_batches)

  return batches_x,batches_y,num_batches


In [0]:
input_s = layers.Input(shape=(28*28,), name='source_input')
input_t = layers.Input(shape=(28*28,), name='target_input')
hidden = layers.Dense(256, activation='softplus', name='hidden')
#prediction = layers.Dense(10, activation='softmax', name='pred')

hidden_s = hidden(input_s)
hidden_t = hidden(input_t)
aux_output = layers.concatenate([hidden_s, hidden_t], name='aux_output')
pred_s = layers.Dense(10, activation='softmax', name='source_output')(hidden_s)
#pred_s = prediction(hidden_s)
#pred_t = prediction(hidden_t)

model_source = Model(input_s, pred_s)
model_target = Model([input_s,input_t],aux_output)

In [0]:
model_source.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=0.001), metrics = ['accuracy'])
model_target.compile(loss=mmd, optimizer=keras.optimizers.Adam(lr=0.00001), metrics = ['accuracy'])

In [24]:
epochs = 5
batch_size=64
best_acc = 0

for iterations in range(epochs):
  source_batch_xs, source_batch_ys, num_batches_source = get_all_batches(source_train_images,source_train_labels,batch_size)
  target_batch_xs, target_batch_ys, num_batches_target = get_all_batches(target_train_images,target_train_labels,batch_size)

  num_batches = int(np.min([num_batches_source,num_batches_target]))
  if iterations==3:
    model_target.compile(loss=mmd, optimizer=keras.optimizers.Adam(lr=0.00007), metrics = ['accuracy'])
  for batch in range(num_batches):
    #Train source for each batch
    print('Epoch: ', iterations, ' Batch: ', batch)
    loss, acc = model_source.train_on_batch(source_batch_xs[batch], source_batch_ys[batch])
    print('Source loss: ', loss, ' Source acc: ', acc)
    if acc>best_acc:
      best_acc = acc
      best_source_model = model_source
      best_source_model.save_weights('source_model.hdf5') 

    len_t = target_train_images.shape[0]
    model_target.fit({'source_input': source_train_images[0:len_t], 'target_input': target_train_images},
                     {'aux_output': np.zeros(len_t)}, epochs=1, batch_size=batch_size )
   
   # for batch_target in range(num_batches):
   #     model_target.train_on_batch({'source_input': source_batch_xs[batch_target], 'target_input': target_batch_xs[batch_target]},
   #                  {'aux_output': np.zeros(batch_size)} )
    

Epoch:  0  Batch:  0
Source loss:  2.6233122  Source acc:  0.078125
Epoch 1/1
Epoch:  0  Batch:  1
Source loss:  2.5682194  Source acc:  0.0625
Epoch 1/1
Epoch:  0  Batch:  2
Source loss:  2.3385215  Source acc:  0.1875
Epoch 1/1
Epoch:  0  Batch:  3
Source loss:  2.2610445  Source acc:  0.1875
Epoch 1/1
Epoch:  0  Batch:  4
Source loss:  2.0254116  Source acc:  0.421875
Epoch 1/1
Epoch:  0  Batch:  5
Source loss:  2.0195837  Source acc:  0.40625
Epoch 1/1
Epoch:  0  Batch:  6
Source loss:  1.9651241  Source acc:  0.421875
Epoch 1/1
Epoch:  0  Batch:  7
Source loss:  1.9129379  Source acc:  0.328125
Epoch 1/1
Epoch:  0  Batch:  8
Source loss:  1.7459332  Source acc:  0.453125
Epoch 1/1
Epoch:  0  Batch:  9
Source loss:  1.7233468  Source acc:  0.53125
Epoch 1/1
Epoch:  0  Batch:  10
Source loss:  1.6619165  Source acc:  0.5
Epoch 1/1
Epoch:  0  Batch:  11
Source loss:  1.554076  Source acc:  0.671875
Epoch 1/1
Epoch:  0  Batch:  12
Source loss:  1.5559978  Source acc:  0.640625
Epoch 1

In [20]:
model_source.evaluate(target_test_images,target_test_labels)



[2.5326802507822466, 0.3168908819875494]

In [21]:
model_source.evaluate(source_test_images,source_test_labels)



[0.9323208137512207, 0.7885]

In [0]:
mmd_weight = 1.

model.compile(loss=['categorical_crossentropy', mmd],
              loss_weights=[1., mmd_weight], 
              optimizer='sgd', 
              metrics=['accuracy'])

In [0]:
epochs = 10
batch_size=64
for iterations in range(epochs):
  source_batch_xs, source_batch_ys, num_batches_source = get_all_batches(source_train_images,source_train_labels,batch_size)
  target_batch_xs, target_batch_ys, num_batches_target = get_all_batches(target_train_images,target_train_labels,batch_size)

  num_batches = int(np.min([num_batches_source,num_batches_target]))

  for batch in range(num_batches):
    if batch==0:
      print('Epoch: ', iterations)
      model.fit({'source_input': source_batch_xs[batch], 'target_input': target_batch_xs[batch]},
            {'source_output': source_batch_ys[batch], 'aux_output': np.zeros(batch_size)},epochs=1, batch_size=64)
    else:
      model.fit({'source_input': source_batch_xs[batch], 'target_input': target_batch_xs[batch]},
            {'source_output': source_batch_ys[batch], 'aux_output': np.zeros(batch_size)},epochs=1, batch_size=64, verbose=0 )
      

In [0]:
model_predict = Model(input_s, pred_s)

model_predict.compile(loss='categorical_crossentropy',
              optimizer='sgd', 
              metrics=['accuracy'])

In [0]:
model_predict.evaluate(source_train_images,source_train_labels)

In [0]:
'''
model.fit({'source_input': source_train_images, 'target_input': target_train_images},
          {'source_output': source_train_labels, 'aux_output': np.zeros(1)},
          epochs=5, batch_size=64)
'''

In [0]:
model.predict({'source_input': source_batch_xs[0], 'target_input': target_batch_xs[0]})