In [4]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from scipy.linalg import eigh

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Read and Load Data

In [6]:
from scipy.io import loadmat
webcam = loadmat('webcam.mat')
dslr = loadmat('dslr.mat')
amazon = loadmat('amazon.mat')
caltech = loadmat('caltech10.mat')

In [7]:
def loadX(data):
  X = data['fts']

  return X

def loady(data):
  y = data['labels']

  return y

# Scaling

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
def standarized(X):
  """
  Return normalized features.
 
  """
  X = StandardScaler().fit_transform(X)

  return X

# PC

In [10]:
def PC(X,d):
  """
  Return d principle components with highest variance

  """
  cov_mat = np.cov(X.T)
  eig_vals, eig_vecs = eigh(cov_mat)
  components = np.column_stack((eig_vecs[:,-i] for i in range(1,d+1)))

  #n_components with highest variance
  # var_exp = [(i / sum(eig_vals))*100 for i in sorted(eig_vals, reverse=True)]
  # var_exp = np.cumsum(var_exp)

  # import matplotlib.pyplot as plt
  # plt.plot( var_exp)
  # plt.xlabel('Number of components')
  # plt.ylabel('Variance') 
  # plt.show()

  return components

# Task 1.1

Subspace Alignment

In [11]:
def subal(S,T,d):
  Xs = PC(S, d)
  Xt = PC(T, d)

  # Defining the alignment matrix
  M = np.dot(Xs.T, Xt)

  # Computing Xa
  Xa = np.dot(Xs, M)

  # Computing source and target projected data 
  Sa = np.dot(S, Xa)
  Ta = np.dot(T, Xt)

  # Fitting a 1-NN classifier 
  KNN = KNeighborsClassifier(n_neighbors=1)
  KNN.fit(Sa,yS)
  pred= KNN.predict(Ta)

  # Accuracy
  print(accuracy_score(yT, pred))

# Task 1.2


Webcam being the source and dslr being target

In [12]:
# Load source and target features
S = loadX(webcam)
T = loadX(dslr)

# Load Source and target labels
yS = loady(webcam)
yT = loady(dslr)

In [13]:
# Scaling source and target features
S = standarized(S)
T = standarized(T)

In [14]:
subal(S,T,96)

0.9235668789808917


  


Dslr being the source and webcam being target

In [15]:
# Load source and target features
S = loadX(dslr)
T = loadX(webcam)

# Load Source and target labels
yS = loady(dslr)
yT = loady(webcam)

In [16]:
subal(S,T,96)

0.6915254237288135


  


# Task 2.1

Sinkhorn- knopp 

In [17]:
pip install POT

Collecting POT
[?25l  Downloading https://files.pythonhosted.org/packages/f8/22/67658e4b227fc52ce1c9bca522dfb3f0cc29a3536d1c7499feb3b0042a41/POT-0.7.0-cp36-cp36m-manylinux2010_x86_64.whl (428kB)
[K     |▊                               | 10kB 15.1MB/s eta 0:00:01[K     |█▌                              | 20kB 9.7MB/s eta 0:00:01[K     |██▎                             | 30kB 8.1MB/s eta 0:00:01[K     |███                             | 40kB 7.1MB/s eta 0:00:01[K     |███▉                            | 51kB 4.3MB/s eta 0:00:01[K     |████▋                           | 61kB 4.7MB/s eta 0:00:01[K     |█████▍                          | 71kB 4.8MB/s eta 0:00:01[K     |██████▏                         | 81kB 5.3MB/s eta 0:00:01[K     |██████▉                         | 92kB 5.5MB/s eta 0:00:01[K     |███████▋                        | 102kB 5.6MB/s eta 0:00:01[K     |████████▍                       | 112kB 5.6MB/s eta 0:00:01[K     |█████████▏                      | 122kB 5.6

In [23]:
import ot
import scipy
from scipy.spatial import distance

In [19]:
def sinkhorn(S,T,reg_e):
  a = np.ones(S.shape[0])
  b = np.ones(T.shape[0])

  M = scipy.spatial.distance.cdist(S,T)

  from sklearn import preprocessing
  M_norm = preprocessing.normalize(M,"max")

  G = ot.sinkhorn(a,b,M_norm, reg_e)
  Sa = np.dot(G,T)
  Sa = Sa.astype(np.float64)

  # Fitting a 1-NN classifier
  from sklearn.neighbors import KNeighborsClassifier
  KNN = KNeighborsClassifier(n_neighbors=1)
  KNN.fit(Sa, yS)
  pred = KNN.predict(T)
  print(accuracy_score(yT, pred))



# Task 2.2

Webcam being the source and dslr being the target

In [20]:
# Load source and target features
S = loadX(webcam)
T = loadX(dslr)

# Load Source and target labels
yS = loady(webcam)
yT = loady(dslr)

In [21]:
# Scaling source and target features
S = standarized(S)
T = standarized(T)

In [25]:
sinkhorn(S,T,0.01)

0.821656050955414




Dslr being the source and webcam being the target

In [26]:
# Load source and target features
S = loadX(dslr)
T = loadX(webcam)

# Load Source and target labels
yS = loady(dslr)
yT = loady(webcam)

In [27]:
# Scaling source and target features
S = standarized(S)
T = standarized(T)

In [28]:
sinkhorn(S,T,0.01)

0.7389830508474576


  v = np.divide(b, KtransposeU)
