# JDOT Domain Adaptation for Caltech+Office dataset
You can download the dataset [here](https://my.pcloud.com/publink/show?code=kZprXk7Z1OmGWUuYioSJbWx3jWeCAhom5FPy)

In [1]:
import numpy as np
import ot
import scipy.io
from scipy.spatial.distance import cdist

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.svm import LinearSVC
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

In [2]:
colormap = np.array(plt.rcParams['axes.prop_cycle'].by_key()['color'])

## Load the dataset

In [3]:
lb = preprocessing.LabelBinarizer()

In [4]:
folder = "/datasets/office-caltech/"

In [5]:
amazon = scipy.io.loadmat(folder + "amazon_decaf.mat")
caltech = scipy.io.loadmat(folder + "caltech_decaf.mat")
dslr = scipy.io.loadmat(folder + "dslr_decaf.mat")
webcam = scipy.io.loadmat(folder + "webcam_decaf.mat")

In [6]:
source = caltech
target = amazon

In [7]:
X_source = source['feas']
Y_source = source['labels'].ravel()
Y_source_bin = lb.fit_transform(Y_source)

X_target = target['feas']
Y_target = target['labels'].ravel()
Y_target_bin = lb.fit_transform(Y_target)

batch_size = 950
source_choice = np.random.choice(len(Y_source), batch_size)
target_choice = np.random.choice(len(Y_target), batch_size)
X_source = X_source[source_choice]
Y_source = Y_source[source_choice]
Y_source_bin = Y_source_bin[source_choice]
X_target = X_target[target_choice]
Y_target= Y_target[target_choice]
Y_target_bin = Y_target_bin[target_choice]

In [8]:
pca_source = PCA(2)
X_pca_source = pca_source.fit_transform(X_source)

pca_target = PCA(2)
X_pca_target = pca_target.fit_transform(X_target)

## Initialization

In [9]:
model = LinearSVC()

In [10]:
model.fit(X_source, Y_source)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [11]:
def f(x):
    return model.predict(x.reshape(1,-1)).flatten()

In [29]:
def label_loss(y, fx):
    return np.sum([y[i_class] * (1-fx[i_class])**2 + (1 - y[i_class]) * (-1 - fx[i_class])**2 
                   for i_class in range(len(y))])

In [30]:
alpha = 1

a = np.ones((len(Y_source),)) / len(Y_source)
b = np.ones((len(Y_target),)) / len(Y_target)

D = cdist(X_source, X_target)

## Run the algorithm

In [None]:
Y_pred[0]

array([6], dtype=uint8)

In [None]:
plt.rcParams['figure.figsize'] = (12, 10)
plt.axis("equal")
# plt.axis(3*np.array([-1,1,-1,1]))

nb_iter = 5

for i in range(nb_iter):
    # Gamma part: learn a transport plan
    print("Compute Y_pred...")
    Y_pred = lb.transform(model.predict(X_target))
    print("Compute L...")
    L = np.array([[label_loss(ys, yt) for yt in Y_pred] for ys in Y_source_bin])
    C = alpha * D + L
    print("Compute gamma...")
    gamma = ot.emd(a, b, C)
   
    # f part: learn 
    Y_hat = len(Y_source) * gamma.T.dot(Y_source)
    Y_hat_bin = (1. / len(Y_source)) * np.dot(gamma.T, Y_source_bin)
    Y_hat = lb.inverse_transform(Y_hat_bin)
    print("Fit model...")
    model.fit(X_target, Y_hat)
    
# Plot
print("Plot...")
plt.scatter(X_pca_source[:,0], X_pca_source[:,1], label="source", s=40, facecolors='none', edgecolor=colormap[Y_source % 10])
plt.scatter(X_pca_target[:,0], X_pca_target[:,1], label="target", color=colormap[Y_target % 10])
ot.plot.plot2D_samples_mat(X_pca_source, X_pca_target, gamma, color=plt.cm.Dark2(8), linestyle=":")
plt.title("Evolution of the transport plan and the classification of target points", fontsize=15)
plt.legend()
    
    #plt.savefig("screenshots/gaussian-13.png")

Compute Y_pred...
Compute L...
Compute gamma...
Fit model...
Compute Y_pred...
Compute L...


## Compute the score

In [None]:
Y_pred = model.predict(X_target)

In [None]:
accuracy_score(Y_target, Y_pred)