From 3306f404395cff8cf595dd821646cb9d8be3aa73 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Wed, 13 Sep 2017 21:13:20 +0200
Subject: [PATCH 01/42] GPU changes:

 - Replace cudamat by cupy for GPU implementations (cupy is still in active development, while cudamat is not)
 - Use the new DA class instead of the old deprecated one

TODO for another PR:
 - Performances are still a bit lower than with cudamat (even if better than CPU for large matrices). Some speedups should be possible by tweaking the code
---
 README.md              |   8 +-
 docs/source/readme.rst |   8 +-
 ot/gpu/__init__.py     |   4 +-
 ot/gpu/bregman.py      |  86 ++++----
 ot/gpu/da.py           | 431 ++++++++++++++++++++++++++++++-----------
 test/test_gpu.py       |  33 ++--
 6 files changed, 394 insertions(+), 176 deletions(-)

diff --git a/README.md b/README.md
index 8d54f4955..8afba676f 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ This open source Python library provide several solvers for optimization problem
 It provides the following solvers:
 
 * OT solver for the linear program/ Earth Movers Distance [1].
-* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (required cudamat).
+* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (required cupy).
 * Bregman projections for Wasserstein barycenter [3] and unmixing [4].
 * Optimal transport for domain adaptation with group lasso regularization [5]
 * Conditional gradient [6] and Generalized conditional gradient for regularized OT [7].
@@ -63,10 +63,10 @@ Some sub-modules require additional dependences which are discussed below
 ```
 pip install pymanopt autograd
 ```
-* **ot.gpu** (GPU accelerated OT) depends on cudamat that have to be installed with:
+* **ot.gpu** (GPU accelerated OT) depends on cupy that have to be installed with:
 ```
-git clone https://github.com/cudamat/cudamat.git
-cd cudamat
+git clone https://github.com/cupy/cupy
+cd cupy
 python setup.py install --user # for user install (no root)
 ```
 
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index c1e001745..257959d56 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -12,7 +12,7 @@ It provides the following solvers:
 -  OT solver for the linear program/ Earth Movers Distance [1].
 -  Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2]
    and stabilized version [9][10] with optional GPU implementation
-   (required cudamat).
+   (required cupy).
 -  Bregman projections for Wasserstein barycenter [3] and unmixing [4].
 -  Optimal transport for domain adaptation with group lasso
    regularization [5]
@@ -88,13 +88,13 @@ below
 
        pip install pymanopt autograd
 
--  **ot.gpu** (GPU accelerated OT) depends on cudamat that have to be
+-  **ot.gpu** (GPU accelerated OT) depends on cupy that have to be
    installed with:
 
    ::
 
-       git clone https://github.com/cudamat/cudamat.git
-       cd cudamat
+       git clone https://github.com/cupy/cupy
+       cd cupy
        python setup.py install --user # for user install (no root)
 
 obviously you need CUDA installed and a compatible GPU.
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index c8f94335a..6edb518b9 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -2,11 +2,11 @@
 
 from . import bregman
 from . import da
-from .bregman import sinkhorn
+from .bregman import sinkhorn_knopp
 
 # Author: Remi Flamary <remi.flamary@unice.fr>
 #         Leo Gautheron <https://github.com/aje> 
 #
 # License: MIT License
 
-__all__ = ["bregman", "da", "sinkhorn"]
+__all__ = ["bregman", "da", "sinkhorn_knopp"]
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 47939c462..efae1227c 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -9,11 +9,11 @@
 # License: MIT License
 
 import numpy as np
-import cudamat
+import cupy as cp
 
 
-def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
-                log=False, returnAsGPU=False):
+def sinkhorn_knopp(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9,
+                   verbose=False, log=False, returnAsGPU=False):
     r"""
     Solve the entropic regularization optimal transport problem on GPU
 
@@ -30,10 +30,12 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
     where :
 
     - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega` is the entropic regularization term
+      :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
     - a and b are source and target weights (sum to 1)
 
-    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
+    scaling algorithm as proposed in [2]_
 
 
     Parameters
@@ -42,7 +44,7 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
         samples weights in the source domain
     b : np.ndarray (nt,)
         samples in the target domain
-    M_GPU : cudamat.CUDAMatrix (ns,nt)
+    M_GPU : cupy.ndarray (ns,nt)
         loss matrix
     reg : float
         Regularization term >0
@@ -55,11 +57,11 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
     log : bool, optional
         record log if True
     returnAsGPU : bool, optional
-        return the OT matrix as a cudamat.CUDAMatrix
+        return the OT matrix as a cupy.ndarray
 
     Returns
     -------
-    gamma : (ns x nt) ndarray
+    gamma : (ns x nt) np.ndarray or cupy.ndarray
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
@@ -79,7 +81,9 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
     References
     ----------
 
-    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
+    Transport, Advances in Neural Information Processing Systems (NIPS) 26,
+    2013.
 
 
     See Also
@@ -98,50 +102,53 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
     # we assume that no distances are null except those of the diagonal of
     # distances
     u = (np.ones(Nini) / Nini).reshape((Nini, 1))
-    u_GPU = cudamat.CUDAMatrix(u)
-    a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1)))
-    ones_GPU = cudamat.empty(u_GPU.shape).assign(1)
+    u_GPU = cp.array(u)
+    a_GPU = cp.array(a.reshape((Nini, 1)))
+    ones_GPU = cp.ones(u_GPU.shape)
     v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1))
-    v_GPU = cudamat.CUDAMatrix(v)
-    b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1)))
+    v_GPU = cp.array(v)
+    b_GPU = cp.array(b.reshape((Nfin, 1))).reshape(-1)
+    M_GPU = cp.divide(M_GPU, -reg)
 
-    M_GPU.divide(-reg)
+    K_GPU = cp.exp(M_GPU)
 
-    K_GPU = cudamat.exp(M_GPU)
+    cp.divide(ones_GPU, a_GPU, out=a_GPU)
+    Kp_GPU = a_GPU.reshape(-1, 1) * K_GPU
 
-    ones_GPU.divide(a_GPU, target=a_GPU)
-    Kp_GPU = cudamat.empty(K_GPU.shape)
-    K_GPU.mult_by_col(a_GPU, target=Kp_GPU)
-
-    tmp_GPU = cudamat.empty(K_GPU.shape)
+    tmp_GPU = cp.empty(K_GPU.shape)
+    tmp2_GPU = cp.empty(b_GPU.shape)
+    KtransposeU_GPU = cp.empty(v_GPU.shape)
 
     cpt = 0
     err = 1
+
     while (err > stopThr and cpt < numItermax):
-        uprev_GPU = u_GPU.copy()
-        vprev_GPU = v_GPU.copy()
+        uprev_GPU = u_GPU
+        vprev_GPU = v_GPU
+        K_GPU.T.dot(u_GPU, out=KtransposeU_GPU)
+        cp.divide(b_GPU.reshape(-1, 1), KtransposeU_GPU, out=v_GPU)
+        Kp_GPU.dot(v_GPU, out=u_GPU)
+        cp.divide(ones_GPU, u_GPU, out=u_GPU)
 
-        KtransposeU_GPU = K_GPU.transpose().dot(u_GPU)
-        b_GPU.divide(KtransposeU_GPU, target=v_GPU)
-        ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU)
+        if (cp.any(KtransposeU_GPU == 0) or
+                cp.any(cp.isnan(u_GPU)) or cp.any(cp.isnan(v_GPU)) or
+                cp.any(cp.isinf(u_GPU)) or cp.any(cp.isinf(v_GPU))):
 
-        if (np.any(KtransposeU_GPU.asarray() == 0) or
-                not u_GPU.allfinite() or not v_GPU.allfinite()):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
-            u_GPU = uprev_GPU.copy()
-            v_GPU = vprev_GPU.copy()
+            u_GPU = uprev_GPU
+            v_GPU = vprev_GPU
             break
         if cpt % 10 == 0:
             # we can speed up the process by checking for the error only all
             # the 10th iterations
-            K_GPU.mult_by_col(u_GPU, target=tmp_GPU)
-            tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU)
 
-            bcopy_GPU = b_GPU.copy().transpose()
-            bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1)
-            err = bcopy_GPU.euclid_norm()**2
+            cp.multiply(u_GPU.reshape(-1, 1), K_GPU, out=tmp_GPU)
+            cp.multiply(tmp_GPU, v_GPU.reshape(1, -1), out=tmp_GPU)
+            cp.sum(tmp_GPU, axis=0, out=tmp2_GPU)
+            tmp2_GPU -= b_GPU
+            err = cp.linalg.norm(tmp2_GPU)**2
             if log:
                 log['err'].append(err)
 
@@ -152,16 +159,15 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
                 print('{:5d}|{:8e}|'.format(cpt, err))
         cpt += 1
     if log:
-        log['u'] = u_GPU.asarray()
-        log['v'] = v_GPU.asarray()
+        log['u'] = cp.asnumpy(u_GPU)
+        log['v'] = cp.asnumpy(v_GPU)
 
-    K_GPU.mult_by_col(u_GPU, target=K_GPU)
-    K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
+    K_GPU = u_GPU.reshape(-1, 1) * K_GPU * v_GPU.reshape(1, -1)
 
     if returnAsGPU:
         res = K_GPU
     else:
-        res = K_GPU.asarray()
+        res = cp.asnumpy(K_GPU)
 
     if log:
         return res, log
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 05c580f3d..ee92eb305 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -12,10 +12,11 @@
 
 
 import numpy as np
-from ..utils import unif
-from ..da import OTDA
-from .bregman import sinkhorn
-import cudamat
+import cupy as cp
+
+from ..utils import check_params
+from ..da import BaseTransport, distribution_estimation_uniform
+from .bregman import sinkhorn_knopp
 
 
 def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
@@ -26,18 +27,18 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
     Parameters
     ----------
     a : np.ndarray (n, f)
-        first matrice
+        first matrix
     b : np.ndarray (m, f)
-        second matrice
+        second matrix
     returnAsGPU : boolean, optional (default False)
-        if True, returns cudamat matrix still on GPU, else return np.ndarray
+        if True, returns cupy matrix still on GPU, else return np.ndarray
     squared : boolean, optional (default False)
-        if True, return squared euclidean distance matrice
+        if True, return squared euclidean distance matrix
 
 
     Returns
     -------
-    c : (n x m) np.ndarray or cudamat.CUDAMatrix
+    c : (n x m) np.ndarray or cupy.ndarray
         pairwise euclidean distance distance matrix
     """
     # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
@@ -46,45 +47,46 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
     # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
     # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
     # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
-
-    a_GPU = cudamat.CUDAMatrix(a)
-    b_GPU = cudamat.CUDAMatrix(b)
+    a_GPU = cp.asarray(a)
+    b_GPU = cp.asarray(b)
 
     # Multiply a by b transpose to obtain in each cell [i,j] of c the
     # value sum{k in range(f)} ( a[i,k]b[j,k] )
-    c_GPU = cudamat.dot(a_GPU, b_GPU.transpose())
+    c_GPU = a_GPU.dot(b_GPU.T)
     # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
-    c_GPU.mult(-2)
+    cp.multiply(c_GPU, -2, out=c_GPU)
 
     # Compute the vectors of the sum of squared elements.
-    a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1)
-    b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1)
+    a_GPU = cp.power(a_GPU, 2).sum(axis=1)
+    b_GPU = cp.power(b_GPU, 2).sum(axis=1)
 
     # Add the vectors in each columns (respectivly rows) of c.
     # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
-    c_GPU.add_col_vec(a_GPU)
+    c_GPU += a_GPU.reshape(-1, 1)
     # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
-    c_GPU.add_row_vec(b_GPU.transpose())
+    c_GPU += b_GPU
 
     if not squared:
-        c_GPU = cudamat.sqrt(c_GPU)
+        cp.sqrt(c_GPU, out=c_GPU)
 
     if returnAsGPU:
         return c_GPU
     else:
-        return c_GPU.asarray()
+        return cp.asnumpy(c_GPU)
 
 
 def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
                      numInnerItermax=200, stopInnerThr=1e-9,
                      verbose=False, log=False):
     """
-    Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
+    Solve the entropic regularization optimal transport problem with nonconvex
+    group lasso regularization.
 
     The function solves the following optimization problem:
 
     .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma)
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) +
+                 \eta \Omega_g(\gamma)
 
         s.t. \gamma 1 = a
 
@@ -94,11 +96,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
     where :
 
     - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - :math:`\Omega_g` is the group lasso  regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`   where  :math:`\mathcal{I}_c` are the index of samples from class c in the source domain.
+    - :math:`\Omega_e` is the entropic regularization term
+      :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega_g` is the group lasso  regulaization term
+      :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
+      where  :math:`\mathcal{I}_c` are the index of samples from class c in the
+      source domain.
     - a and b are source and target weights (sum to 1)
 
-    The algorithm used for solving the problem is the generalised conditional gradient as proposed in  [5]_ [7]_
+    The algorithm used for solving the problem is the generalised conditional
+    gradient as proposed in  [5]_ [7]_
 
 
     Parameters
@@ -109,7 +116,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
         labels of samples in the source domain
     b : np.ndarray (nt,)
         samples weights in the target domain
-    M_GPU : cudamat.CUDAMatrix (ns,nt)
+    M_GPU : cupy.ndarray (ns,nt)
         loss matrix
     reg : float
         Regularization term for entropic regularization >0
@@ -129,7 +136,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
 
     Returns
     -------
-    gamma : (ns x nt) ndarray
+    gamma : (ns x nt) np.ndarray
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
@@ -138,8 +145,12 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
     References
     ----------
 
-    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
-    .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
+    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport
+           for Domain Adaptation," in IEEE Transactions on Pattern Analysis and
+           Machine Intelligence , vol.PP, no.99, pp.1-1
+    .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized
+           conditional gradient: analysis of convergence and applications.
+           arXiv preprint arXiv:1510.06567.
 
     See Also
     --------
@@ -150,103 +161,303 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
     """
     p = 0.5
     epsilon = 1e-3
-    Nfin = len(b)
 
     indices_labels = []
     classes = np.unique(labels_a)
     for c in classes:
         idxc, = np.where(labels_a == c)
-        indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
+        indices_labels.append(cp.asarray(idxc.reshape(1, -1)))
+
+    W_GPU = cp.zeros(M_GPU.shape)
 
-    Mreg_GPU = cudamat.empty(M_GPU.shape)
-    W_GPU = cudamat.empty(M_GPU.shape).assign(0)
+    def describe_res(r):
+        print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
+              .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
 
     for cpt in range(numItermax):
-        Mreg_GPU.assign(M_GPU)
-        Mreg_GPU.add_mult(W_GPU, eta)
-        transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
-                              stopThr=stopInnerThr, returnAsGPU=True)
+        Mreg_GPU = cp.add(M_GPU, cp.multiply(W_GPU, eta))
+        transp_GPU = sinkhorn_knopp(a, b, Mreg_GPU, reg,
+                                    numItermax=numInnerItermax,
+                                    stopThr=stopInnerThr, returnAsGPU=True)
         # the transport has been computed. Check if classes are really
         # separated
-        W_GPU.assign(1)
-        W_GPU = W_GPU.transpose()
+        W_GPU.fill(1)
         for (i, c) in enumerate(classes):
-            (_, nbRow) = indices_labels[i].shape
-            tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
-            transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
-            majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
-            cudamat.pow(majs_GPU, (p - 1))
-            majs_GPU.mult(p)
-
-            tmpC_GPU.assign(0)
-            tmpC_GPU.add_col_vec(majs_GPU)
-            W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
-
-        W_GPU = W_GPU.transpose()
-
-    return transp_GPU.asarray()
-
-
-class OTDA_GPU(OTDA):
-    def normalizeM(self, norm):
-        if norm == "median":
-            self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
-        elif norm == "max":
-            self.M_GPU.divide(float(np.max(self.M_GPU.asarray())))
-        elif norm == "log":
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-        elif norm == "loglog":
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-
-
-class OTDA_sinkhorn(OTDA_GPU):
-    def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
-        cudamat.init()
-        xs = np.asarray(xs, dtype=np.float64)
-        xt = np.asarray(xt, dtype=np.float64)
-
-        self.xs = xs
-        self.xt = xt
-
-        if wt is None:
-            wt = unif(xt.shape[0])
-        if ws is None:
-            ws = unif(xs.shape[0])
-
-        self.ws = ws
-        self.wt = wt
-
-        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
+            majs_GPU = cp.sum(transp_GPU[indices_labels[i]][0], axis=0)
+            majs_GPU = p * ((majs_GPU + epsilon)**(p - 1))
+            W_GPU[indices_labels[i]] = majs_GPU
+
+    return cp.asnumpy(transp_GPU)
+
+
+def cost_normalization(C, norm=None):
+    """ Apply normalization to the loss matrix
+
+
+    Parameters
+    ----------
+    C : cupy.array (n1, n2)
+        The cost matrix to normalize.
+    norm : str
+        type of normalization from 'median','max','log','loglog'. Any other
+        value do not normalize.
+
+
+    Returns
+    -------
+
+    C : cupy.array (n1, n2)
+        The input cost matrix normalized according to given norm.
+
+    """
+
+    if norm == "median":
+        C = cp.divide(np.median(cp.asnumpy(C)))
+    elif norm == "max":
+        C = cp.divide(cp.max(C))
+    elif norm == "log":
+        C = cp.log(1 + C)
+    elif norm == "loglog":
+        C = cp.log(1 + cp.log(1 + C))
+
+    return C
+
+
+class SinkhornTransport(BaseTransport):
+    """Domain Adapatation OT method based on Sinkhorn Algorithm
+
+    Parameters
+    ----------
+    reg_e : float, optional (default=1)
+        Entropic regularization parameter
+    max_iter : int, float, optional (default=1000)
+        The minimum number of iteration before stopping the optimization
+        algorithm if no it has not converged
+    tol : float, optional (default=10e-9)
+        The precision required to stop the optimization algorithm.
+    mapping : string, optional (default="barycentric")
+        The kind of mapping to apply to transport samples from a domain into
+        another one.
+        if "barycentric" only the samples used to estimate the coupling can
+        be transported from a domain to another one.
+    metric : string, optional (default="sqeuclidean")
+        The ground metric for the Wasserstein problem
+    norm : string, optional (default=None)
+        If given, normalize the ground metric to avoid numerical errors that
+        can occur with large metric values.
+    distribution : string, optional (default="uniform")
+        The kind of distribution estimation to employ
+    verbose : int, optional (default=0)
+        Controls the verbosity of the optimization algorithm
+    log : int, optional (default=0)
+        Controls the logs of the optimization algorithm
+    limit_max: float, optional (defaul=np.infty)
+        Controls the semi supervised mode. Transport between labeled source
+        and target samples of different classes will exhibit an infinite cost
+
+    Attributes
+    ----------
+    coupling_ : array-like, shape (n_source_samples, n_target_samples)
+        The optimal coupling
+    log_ : dictionary
+        The dictionary of log, empty dic if parameter log is not True
+
+    References
+    ----------
+    .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+           "Optimal Transport for Domain Adaptation," in IEEE Transactions
+           on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
+           Transport, Advances in Neural Information Processing Systems (NIPS)
+           26, 2013
+    """
+
+    def __init__(self, reg_e=1., max_iter=1000,
+                 tol=10e-9, verbose=False, log=False,
+                 metric="sqeuclidean", norm=None,
+                 distribution_estimation=distribution_estimation_uniform,
+                 out_of_sample_map='ferradans', limit_max=np.infty):
+
+        self.reg_e = reg_e
+        self.max_iter = max_iter
+        self.tol = tol
+        self.verbose = verbose
+        self.log = log
+        self.metric = metric
+        self.norm = norm
+        self.limit_max = limit_max
+        self.distribution_estimation = distribution_estimation
+        self.out_of_sample_map = out_of_sample_map
+
+    def fit(self, Xs=None, ys=None, Xt=None, yt=None):
+        """Build a coupling matrix from source and target sets of samples
+        (Xs, ys) and (Xt, yt)
+
+        Parameters
+        ----------
+        Xs : array-like, shape (n_source_samples, n_features)
+            The training input samples.
+        ys : array-like, shape (n_source_samples,)
+            The class labels
+        Xt : array-like, shape (n_target_samples, n_features)
+            The training input samples.
+        yt : array-like, shape (n_target_samples,)
+            The class labels.
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+
+        # Fit
+        if not check_params(Xs=Xs, Xt=Xt):
+            return
+        # pairwise distance
+        self.cost_ = pairwiseEuclideanGPU(Xs, Xt, returnAsGPU=True,
                                           squared=True)
-        self.normalizeM(norm)
-        self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
-        self.computed = True
+        self.cost_ = cost_normalization(self.cost_, self.norm)
+
+        # distribution estimation
+        self.mu_s = self.distribution_estimation(Xs)
+        self.mu_t = self.distribution_estimation(Xt)
+
+        # store arrays of samples
+        self.xs_ = Xs
+        self.xt_ = Xt
 
+        # coupling estimation
+        returned_ = sinkhorn_knopp(
+            a=self.mu_s, b=self.mu_t, M_GPU=self.cost_, reg=self.reg_e,
+            numItermax=self.max_iter, stopThr=self.tol,
+            verbose=self.verbose, log=self.log)
 
-class OTDA_lpl1(OTDA_GPU):
-    def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
-            **kwargs):
-        cudamat.init()
-        xs = np.asarray(xs, dtype=np.float64)
-        xt = np.asarray(xt, dtype=np.float64)
+        # deal with the value of log
+        if self.log:
+            self.coupling_, self.log_ = returned_
+        else:
+            self.coupling_ = returned_
+            self.log_ = dict()
 
-        self.xs = xs
-        self.xt = xt
+        return self
 
-        if wt is None:
-            wt = unif(xt.shape[0])
-        if ws is None:
-            ws = unif(xs.shape[0])
 
-        self.ws = ws
-        self.wt = wt
+class SinkhornLpl1Transport(BaseTransport):
+    """Domain Adapatation OT method based on sinkhorn algorithm +
+    LpL1 class regularization.
 
-        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
+    Parameters
+    ----------
+    reg_e : float, optional (default=1)
+        Entropic regularization parameter
+    reg_cl : float, optional (default=0.1)
+        Class regularization parameter
+    mapping : string, optional (default="barycentric")
+        The kind of mapping to apply to transport samples from a domain into
+        another one.
+        if "barycentric" only the samples used to estimate the coupling can
+        be transported from a domain to another one.
+    metric : string, optional (default="sqeuclidean")
+        The ground metric for the Wasserstein problem
+    norm : string, optional (default=None)
+        If given, normalize the ground metric to avoid numerical errors that
+        can occur with large metric values.
+    distribution : string, optional (default="uniform")
+        The kind of distribution estimation to employ
+    max_iter : int, float, optional (default=10)
+        The minimum number of iteration before stopping the optimization
+        algorithm if no it has not converged
+    max_inner_iter : int, float, optional (default=200)
+        The number of iteration in the inner loop
+    verbose : int, optional (default=0)
+        Controls the verbosity of the optimization algorithm
+    limit_max: float, optional (defaul=np.infty)
+        Controls the semi supervised mode. Transport between labeled source
+        and target samples of different classes will exhibit an infinite cost
+
+    Attributes
+    ----------
+    coupling_ : array-like, shape (n_source_samples, n_target_samples)
+        The optimal coupling
+
+    References
+    ----------
+
+    .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+       "Optimal Transport for Domain Adaptation," in IEEE
+       Transactions on Pattern Analysis and Machine Intelligence ,
+       vol.PP, no.99, pp.1-1
+    .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+       Generalized conditional gradient: analysis of convergence
+       and applications. arXiv preprint arXiv:1510.06567.
+
+    """
+
+    def __init__(self, reg_e=1., reg_cl=0.1,
+                 max_iter=10, max_inner_iter=200,
+                 tol=10e-9, verbose=False,
+                 metric="sqeuclidean", norm=None,
+                 distribution_estimation=distribution_estimation_uniform,
+                 out_of_sample_map='ferradans', limit_max=np.infty):
+
+        self.reg_e = reg_e
+        self.reg_cl = reg_cl
+        self.max_iter = max_iter
+        self.max_inner_iter = max_inner_iter
+        self.tol = tol
+        self.verbose = verbose
+        self.metric = metric
+        self.norm = norm
+        self.distribution_estimation = distribution_estimation
+        self.out_of_sample_map = out_of_sample_map
+        self.limit_max = limit_max
+
+    def fit(self, Xs, ys=None, Xt=None, yt=None):
+        """Build a coupling matrix from source and target sets of samples
+        (Xs, ys) and (Xt, yt)
+
+        Parameters
+        ----------
+        Xs : array-like, shape (n_source_samples, n_features)
+            The training input samples.
+        ys : array-like, shape (n_source_samples,)
+            The class labels
+        Xt : array-like, shape (n_target_samples, n_features)
+            The training input samples.
+        yt : array-like, shape (n_target_samples,)
+            The class labels. If some target samples are unlabeled, fill the
+            yt's elements with -1.
+
+            Warning: Note that, due to this convention -1 cannot be used as a
+            class label
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+
+        # check the necessary inputs parameters are here
+        if not check_params(Xs=Xs, Xt=Xt, ys=ys):
+            return self
+
+        # pairwise distance
+        self.cost_ = pairwiseEuclideanGPU(Xs, Xt, returnAsGPU=True,
                                           squared=True)
-        self.normalizeM(norm)
-        self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
-        self.computed = True
+        self.cost_ = cost_normalization(self.cost_, self.norm)
+
+        # distribution estimation
+        self.mu_s = self.distribution_estimation(Xs)
+        self.mu_t = self.distribution_estimation(Xt)
+
+        # store arrays of samples
+        self.xs_ = Xs
+        self.xt_ = Xt
+
+        self.coupling_ = sinkhorn_lpl1_mm(
+                a=self.mu_s, labels_a=ys, b=self.mu_t, M_GPU=self.cost_,
+                reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
+                numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
+                verbose=self.verbose)
+
+        return self
diff --git a/test/test_gpu.py b/test/test_gpu.py
index 615c2a7c3..f597830a6 100644
--- a/test/test_gpu.py
+++ b/test/test_gpu.py
@@ -4,12 +4,13 @@
 #
 # License: MIT License
 
-import numpy as np
-import ot
 import time
+
+import numpy as np
 import pytest
+import ot
 
-try:  # test if cudamat installed
+try:  # test if cupy is installed
     import ot.gpu
     nogpu = False
 except ImportError:
@@ -30,13 +31,13 @@ def describe_res(r):
         a = rng.rand(n_samples // 4, 100)
         b = rng.rand(n_samples, 100)
         time1 = time.time()
-        transport = ot.da.OTDA_sinkhorn()
-        transport.fit(a, b)
-        G1 = transport.G
+        transport = ot.da.SinkhornTransport()
+        transport.fit(Xs=a, Xt=b)
+        G1 = transport.coupling_
         time2 = time.time()
-        transport = ot.gpu.da.OTDA_sinkhorn()
-        transport.fit(a, b)
-        G2 = transport.G
+        transport = ot.gpu.da.SinkhornTransport()
+        transport.fit(Xs=a, Xt=b)
+        G2 = transport.coupling_
         time3 = time.time()
         print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1))
         describe_res(G1)
@@ -55,19 +56,19 @@ def describe_res(r):
         print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
               .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
 
-    for n_samples in [50, 100, 500]:
+    for n_samples in [50, 5000, 10000]:
         print(n_samples)
         a = rng.rand(n_samples // 4, 100)
         labels_a = np.random.randint(10, size=(n_samples // 4))
         b = rng.rand(n_samples, 100)
         time1 = time.time()
-        transport = ot.da.OTDA_lpl1()
-        transport.fit(a, labels_a, b)
-        G1 = transport.G
+        transport = ot.da.SinkhornLpl1Transport()
+        transport.fit(Xs=a, ys=labels_a, Xt=b)
+        G1 = transport.coupling_
         time2 = time.time()
-        transport = ot.gpu.da.OTDA_lpl1()
-        transport.fit(a, labels_a, b)
-        G2 = transport.G
+        transport = ot.gpu.da.SinkhornLpl1Transport()
+        transport.fit(Xs=a, ys=labels_a, Xt=b)
+        G2 = transport.coupling_
         time3 = time.time()
         print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(
             time2 - time1))

From 93376d59d9d7099739b6fbd9642e40b7b0819750 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Wed, 13 Sep 2017 21:22:45 +0200
Subject: [PATCH 02/42] Fix indent

---
 ot/gpu/da.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index ee92eb305..c4b868210 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -455,9 +455,9 @@ class label
         self.xt_ = Xt
 
         self.coupling_ = sinkhorn_lpl1_mm(
-                a=self.mu_s, labels_a=ys, b=self.mu_t, M_GPU=self.cost_,
-                reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
-                numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
-                verbose=self.verbose)
+            a=self.mu_s, labels_a=ys, b=self.mu_t, M_GPU=self.cost_,
+            reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
+            numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
+            verbose=self.verbose)
 
         return self

From 3bf4d09cfe3ade7e7ad57a949714b61e0d2f67bc Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Wed, 13 Sep 2017 21:32:06 +0200
Subject: [PATCH 03/42] Remove debug

---
 test/test_gpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_gpu.py b/test/test_gpu.py
index f597830a6..e06f28482 100644
--- a/test/test_gpu.py
+++ b/test/test_gpu.py
@@ -56,7 +56,7 @@ def describe_res(r):
         print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
               .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
 
-    for n_samples in [50, 5000, 10000]:
+    for n_samples in [50, 100, 500]:
         print(n_samples)
         a = rng.rand(n_samples // 4, 100)
         labels_a = np.random.randint(10, size=(n_samples // 4))

From bbe9a92019e8356afd3c00381514c6011b55ea41 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Thu, 14 Sep 2017 09:18:12 +0200
Subject: [PATCH 04/42] Remove debug

---
 ot/gpu/da.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index c4b868210..2405bcace 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -170,10 +170,6 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
 
     W_GPU = cp.zeros(M_GPU.shape)
 
-    def describe_res(r):
-        print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
-              .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
-
     for cpt in range(numItermax):
         Mreg_GPU = cp.add(M_GPU, cp.multiply(W_GPU, eta))
         transp_GPU = sinkhorn_knopp(a, b, Mreg_GPU, reg,

From 05c1bd6ea7b4734746573ba8d960f5b692d1dc5c Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Thu, 14 Sep 2017 10:16:52 +0200
Subject: [PATCH 05/42] Small speedup LpL1

---
 ot/gpu/da.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 2405bcace..abbb027f5 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -169,9 +169,10 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
         indices_labels.append(cp.asarray(idxc.reshape(1, -1)))
 
     W_GPU = cp.zeros(M_GPU.shape)
-
+    Mreg_GPU = cp.empty(M_GPU.shape)
     for cpt in range(numItermax):
-        Mreg_GPU = cp.add(M_GPU, cp.multiply(W_GPU, eta))
+        cp.multiply(W_GPU, eta, out=Mreg_GPU)
+        Mreg_GPU += M_GPU
         transp_GPU = sinkhorn_knopp(a, b, Mreg_GPU, reg,
                                     numItermax=numInnerItermax,
                                     stopThr=stopInnerThr, returnAsGPU=True)

From abf6fe08e5e8f09a941ac377814ef8047ebf9c12 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Tue, 19 Sep 2017 07:54:22 +0200
Subject: [PATCH 06/42] Add utils functions for CPU/GPU merge

Add function pairwiseEuclidean that can be used with numpy or cupy. cupy (GPU) is used if parameter gpu==True and cupy is available. Otherwise compute with numpy.
This function is faster than scipy.spatial.distance.cdist for sqeuclidean even when computing using the CPU (numpy).
---
 ot/utils.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 85 insertions(+), 6 deletions(-)

diff --git a/ot/utils.py b/ot/utils.py
index 31a002b0a..b6a130855 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -15,7 +15,10 @@
 from scipy.spatial.distance import cdist
 import sys
 import warnings
-
+try:
+    import cupy as cp
+except ImportError:
+    cp = False
 
 __time_tic_toc = time.time()
 
@@ -74,8 +77,65 @@ def clean_zeros(a, b, M):
     return a2, b2, M2
 
 
+def pairwiseEuclidean(a, b, gpu=False, squared=False):
+    """
+    Compute the pairwise euclidean distance between matrices a and b.
+
+
+    Parameters
+    ----------
+    a : np.ndarray (n, f)
+        first matrix
+    b : np.ndarray (m, f)
+        second matrix
+    gpu : boolean, optional (default False)
+        if True and and the module cupy is available, the computation is done
+        on the GPU and the type of the matrix returned is cupy.ndarray.
+        Otherwise, compute on the CPU and returns np.ndarray.
+    squared : boolean, optional (default False)
+        if True, return squared euclidean distance matrix
+
+
+    Returns
+    -------
+    c : (n x m) np.ndarray or cupy.ndarray
+        pairwise euclidean distance distance matrix
+    """
+    # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
+    # First compute in c_GPU the squared euclidean distance. And return its
+    # square root. At each cell [i,j] of c, we want to have
+    # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
+    # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
+    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
+    if gpu:
+        a, b = to_gpu(a, b)
+    xp = get_array_module(a, b)
+
+    # Multiply a by b transpose to obtain in each cell [i,j] of c the
+    # value sum{k in range(f)} ( a[i,k]b[j,k] )
+    c = a.dot(b.T)
+    # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
+    xp.multiply(c, -2, out=c)
+
+    # Compute the vectors of the sum of squared elements.
+    a = xp.power(a, 2).sum(axis=1)
+    b = xp.power(b, 2).sum(axis=1)
+
+    # Add the vectors in each columns (respectivly rows) of c.
+    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
+    c += a.reshape(-1, 1)
+    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
+    c += b
+
+    if not squared:
+        xp.sqrt(c, out=c)
+
+    return c
+
+
 def dist(x1, x2=None, metric='sqeuclidean'):
-    """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist
+    """Compute distance between samples in x1 and x2 using function
+    scipy.spatial.distance.cdist
 
     Parameters
     ----------
@@ -85,10 +145,11 @@ def dist(x1, x2=None, metric='sqeuclidean'):
     x2 : np.array (n2,d), optional
         matrix with n2 samples of size d (if None then x2=x1)
     metric : str, fun, optional
-        name of the metric to be computed (full list in the doc of scipy),  If a string,
-        the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
-        'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
-        'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
+        name of the metric to be computed (full list in the doc of scipy),  If
+        a string, the distance function can be 'braycurtis', 'canberra',
+        'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean',
+        'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching',
+        'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
         'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
 
 
@@ -222,6 +283,24 @@ def check_params(**kwargs):
     return check
 
 
+def get_array_module(*args):
+    """ return the proper module (numpy of cupy) depending on where the input
+    array are (CPU or GPU)
+    """
+    if cp:
+        return cp.get_array_module(*args)
+    else:
+        return np
+
+
+def to_gpu(*args):
+    """ Upload numpy array to GPU if available and return them"""
+    if cp:
+        return (cp.asarray(x) for x in args)
+    else:
+        return args
+
+
 class deprecated(object):
     """Decorator to mark a function or class as deprecated.
 

From d7d25b3603c095639a306eec9aa3d84849b091d9 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Tue, 19 Sep 2017 08:08:16 +0200
Subject: [PATCH 07/42] Use function pairwiseEuclidean for dist computation

TODO:
 - add parameter "gpu" in init of all classes extending BaseTransport
 - pass parameter "gpu" to function pairwiseEuclidean
 - change in file bregman.py the function sinkhorn_knopp to use cupy or numpy
 - change in file da.py the function sinkhorn_lpl1_mm to use cupy or numpy
 - same but for other functions...
---
 ot/da.py    | 7 +++++--
 ot/utils.py | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/ot/da.py b/ot/da.py
index 1d3d0bae5..db7df9477 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -13,7 +13,7 @@
 
 from .bregman import sinkhorn
 from .lp import emd
-from .utils import unif, dist, kernel, cost_normalization
+from .utils import unif, dist, pairwiseEuclidean, kernel, cost_normalization
 from .utils import check_params, deprecated, BaseEstimator
 from .optim import cg
 from .optim import gcg
@@ -983,7 +983,10 @@ class label
         if check_params(Xs=Xs, Xt=Xt):
 
             # pairwise distance
-            self.cost_ = dist(Xs, Xt, metric=self.metric)
+            if self.metric == "sqeuclidean":
+                self.cost_ = pairwiseEuclidean(Xs, Xt, squared=True)
+            else:
+                self.cost_ = dist(Xs, Xt, metric=self.metric)
             self.cost_ = cost_normalization(self.cost_, self.norm)
 
             if (ys is not None) and (yt is not None):
diff --git a/ot/utils.py b/ot/utils.py
index b6a130855..44674ff25 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -89,7 +89,7 @@ def pairwiseEuclidean(a, b, gpu=False, squared=False):
     b : np.ndarray (m, f)
         second matrix
     gpu : boolean, optional (default False)
-        if True and and the module cupy is available, the computation is done
+        if True and the module cupy is available, the computation is done
         on the GPU and the type of the matrix returned is cupy.ndarray.
         Otherwise, compute on the CPU and returns np.ndarray.
     squared : boolean, optional (default False)
@@ -102,7 +102,7 @@ def pairwiseEuclidean(a, b, gpu=False, squared=False):
         pairwise euclidean distance distance matrix
     """
     # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
-    # First compute in c_GPU the squared euclidean distance. And return its
+    # First compute in c the squared euclidean distance. And return its
     # square root. At each cell [i,j] of c, we want to have
     # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
     # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:

From 787457063d8647af8eb9c3331593b47b279227e5 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Tue, 19 Sep 2017 12:42:07 +0200
Subject: [PATCH 08/42] Initial implementation GPU for sinkhorn

- modified sinkhorn knopp code to be executed on numpy or cupy depending on the type of input matrices
- at the moment GPU version is slow compared to CPU. with the test I added I obtain these results:
```
Normal, time:   4.96 sec
   GPU, time:   4.65 sec
```

- TODO:
 - improve performances of sinkhorn knopp for GPU
 - add gpu support for LpL1
---
 ot/bregman.py     | 46 ++++++++++++++++++++++++----------------------
 ot/da.py          | 19 +++++++++++++------
 test/test_gpu2.py | 25 +++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 28 deletions(-)
 create mode 100644 test/test_gpu2.py

diff --git a/ot/bregman.py b/ot/bregman.py
index d63c51de1..d6a388a22 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -9,6 +9,7 @@
 # License: MIT License
 
 import numpy as np
+from .utils import get_array_module
 
 
 def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
@@ -308,15 +309,16 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     ot.optim.cg : General regularized OT
 
     """
+    xp = get_array_module(a, b, M)
 
-    a = np.asarray(a, dtype=np.float64)
-    b = np.asarray(b, dtype=np.float64)
-    M = np.asarray(M, dtype=np.float64)
+    a = xp.asarray(a, dtype=xp.float64)
+    b = xp.asarray(b, dtype=xp.float64)
+    M = xp.asarray(M, dtype=xp.float64)
 
     if len(a) == 0:
-        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+        a = xp.ones((M.shape[0],), dtype=xp.float64) / M.shape[0]
     if len(b) == 0:
-        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+        b = xp.ones((M.shape[1],), dtype=xp.float64) / M.shape[1]
 
     # init data
     Nini = len(a)
@@ -333,16 +335,16 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     # we assume that no distances are null except those of the diagonal of
     # distances
     if nbb:
-        u = np.ones((Nini, nbb)) / Nini
-        v = np.ones((Nfin, nbb)) / Nfin
+        u = xp.ones((Nini, nbb)) / Nini
+        v = xp.ones((Nfin, nbb)) / Nfin
     else:
-        u = np.ones(Nini) / Nini
-        v = np.ones(Nfin) / Nfin
+        u = xp.ones(Nini) / Nini
+        v = xp.ones(Nfin) / Nfin
 
     # print(reg)
 
-    K = np.exp(-M / reg)
-    # print(np.min(K))
+    K = xp.exp(-M / reg)
+    # print(xp.min(K))
 
     Kp = (1 / a).reshape(-1, 1) * K
     cpt = 0
@@ -350,13 +352,13 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     while (err > stopThr and cpt < numItermax):
         uprev = u
         vprev = v
-        KtransposeU = np.dot(K.T, u)
-        v = np.divide(b, KtransposeU)
-        u = 1. / np.dot(Kp, v)
+        KtransposeU = xp.dot(K.T, u)
+        v = xp.divide(b, KtransposeU)
+        u = 1. / xp.dot(Kp, v)
 
-        if (np.any(KtransposeU == 0) or
-                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
-                np.any(np.isinf(u)) or np.any(np.isinf(v))):
+        if (xp.any(KtransposeU == 0) or
+                xp.any(xp.isnan(u)) or xp.any(xp.isnan(v)) or
+                xp.any(xp.isinf(u)) or xp.any(xp.isinf(v))):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
@@ -367,11 +369,11 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
             # we can speed up the process by checking for the error only all
             # the 10th iterations
             if nbb:
-                err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
-                    np.sum((v - vprev)**2) / np.sum((v)**2)
+                err = xp.sum((u - uprev)**2) / xp.sum((u)**2) + \
+                    xp.sum((v - vprev)**2) / xp.sum((v)**2)
             else:
                 transp = u.reshape(-1, 1) * (K * v)
-                err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
+                err = xp.linalg.norm((xp.sum(transp, axis=0) - b))**2
             if log:
                 log['err'].append(err)
 
@@ -386,9 +388,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
         log['v'] = v
 
     if nbb:  # return only loss
-        res = np.zeros((nbb))
+        res = xp.zeros((nbb))
         for i in range(nbb):
-            res[i] = np.sum(
+            res[i] = xp.sum(
                 u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M)
         if log:
             return res, log
diff --git a/ot/da.py b/ot/da.py
index db7df9477..f00d37e78 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -14,7 +14,7 @@
 from .bregman import sinkhorn
 from .lp import emd
 from .utils import unif, dist, pairwiseEuclidean, kernel, cost_normalization
-from .utils import check_params, deprecated, BaseEstimator
+from .utils import check_params, deprecated, BaseEstimator, to_gpu
 from .optim import cg
 from .optim import gcg
 
@@ -984,7 +984,8 @@ class label
 
             # pairwise distance
             if self.metric == "sqeuclidean":
-                self.cost_ = pairwiseEuclidean(Xs, Xt, squared=True)
+                self.cost_ = pairwiseEuclidean(Xs, Xt, gpu=self.gpu,
+                                               squared=True)
             else:
                 self.cost_ = dist(Xs, Xt, metric=self.metric)
             self.cost_ = cost_normalization(self.cost_, self.norm)
@@ -1010,6 +1011,8 @@ class label
             # distribution estimation
             self.mu_s = self.distribution_estimation(Xs)
             self.mu_t = self.distribution_estimation(Xt)
+            if self.gpu:
+                self.mu_s, self.mu_t = to_gpu(self.mu_s, self.mu_t)
 
             # store arrays of samples
             self.xs_ = Xs
@@ -1235,7 +1238,7 @@ def __init__(self, reg_e=1., max_iter=1000,
                  tol=10e-9, verbose=False, log=False,
                  metric="sqeuclidean", norm=None,
                  distribution_estimation=distribution_estimation_uniform,
-                 out_of_sample_map='ferradans', limit_max=np.infty):
+                 out_of_sample_map='ferradans', limit_max=np.infty, gpu=False):
 
         self.reg_e = reg_e
         self.max_iter = max_iter
@@ -1247,6 +1250,7 @@ def __init__(self, reg_e=1., max_iter=1000,
         self.limit_max = limit_max
         self.distribution_estimation = distribution_estimation
         self.out_of_sample_map = out_of_sample_map
+        self.gpu = gpu
 
     def fit(self, Xs=None, ys=None, Xt=None, yt=None):
         """Build a coupling matrix from source and target sets of samples
@@ -1335,7 +1339,7 @@ class EMDTransport(BaseTransport):
     def __init__(self, metric="sqeuclidean", norm=None,
                  distribution_estimation=distribution_estimation_uniform,
                  out_of_sample_map='ferradans', limit_max=10,
-                 max_iter=100000):
+                 max_iter=100000, gpu=False):
 
         self.metric = metric
         self.norm = norm
@@ -1343,6 +1347,7 @@ def __init__(self, metric="sqeuclidean", norm=None,
         self.distribution_estimation = distribution_estimation
         self.out_of_sample_map = out_of_sample_map
         self.max_iter = max_iter
+        self.gpu = gpu
 
     def fit(self, Xs, ys=None, Xt=None, yt=None):
         """Build a coupling matrix from source and target sets of samples
@@ -1435,7 +1440,7 @@ def __init__(self, reg_e=1., reg_cl=0.1,
                  tol=10e-9, verbose=False,
                  metric="sqeuclidean", norm=None,
                  distribution_estimation=distribution_estimation_uniform,
-                 out_of_sample_map='ferradans', limit_max=np.infty):
+                 out_of_sample_map='ferradans', limit_max=np.infty, gpu=False):
 
         self.reg_e = reg_e
         self.reg_cl = reg_cl
@@ -1448,6 +1453,7 @@ def __init__(self, reg_e=1., reg_cl=0.1,
         self.distribution_estimation = distribution_estimation
         self.out_of_sample_map = out_of_sample_map
         self.limit_max = limit_max
+        self.gpu = gpu
 
     def fit(self, Xs, ys=None, Xt=None, yt=None):
         """Build a coupling matrix from source and target sets of samples
@@ -1549,7 +1555,7 @@ def __init__(self, reg_e=1., reg_cl=0.1,
                  tol=10e-9, verbose=False, log=False,
                  metric="sqeuclidean", norm=None,
                  distribution_estimation=distribution_estimation_uniform,
-                 out_of_sample_map='ferradans', limit_max=10):
+                 out_of_sample_map='ferradans', limit_max=10, gpu=False):
 
         self.reg_e = reg_e
         self.reg_cl = reg_cl
@@ -1563,6 +1569,7 @@ def __init__(self, reg_e=1., reg_cl=0.1,
         self.distribution_estimation = distribution_estimation
         self.out_of_sample_map = out_of_sample_map
         self.limit_max = limit_max
+        self.gpu = gpu
 
     def fit(self, Xs, ys=None, Xt=None, yt=None):
         """Build a coupling matrix from source and target sets of samples
diff --git a/test/test_gpu2.py b/test/test_gpu2.py
new file mode 100644
index 000000000..2dcbd6750
--- /dev/null
+++ b/test/test_gpu2.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+import time
+import numpy as np
+import ot
+import cupy as cp
+
+a = np.random.rand(10000, 100)
+b = np.random.rand(10000, 100)
+
+ot1 = ot.da.SinkhornTransport(gpu=False)
+ot2 = ot.da.SinkhornTransport(gpu=True)
+
+time1 = time.time()
+ot1.fit(Xs=a, Xt=b)
+g1 = ot1.coupling_
+time2 = time.time()
+ot2.fit(Xs=a, Xt=b)
+g2 = ot2.coupling_
+time3 = time.time()
+
+
+print("Normal, time: {:6.2f} sec ".format(time2 - time1))
+print("   GPU, time: {:6.2f} sec ".format(time3 - time2))
+np.testing.assert_allclose(g1, cp.asnumpy(g2), rtol=1e-5, atol=1e-5)

From 6c7012f7a02e06148f9f7c284cedb4fc0b591a4d Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Tue, 19 Sep 2017 13:01:36 +0200
Subject: [PATCH 09/42] Little speedup sinkhorn:

Before
```
Normal, time:   4.96 sec
   GPU, time:   4.65 sec
```
After
```
Normal, time:   4.21 sec
   GPU, time:   3.45 sec
```
---
 ot/bregman.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/ot/bregman.py b/ot/bregman.py
index d6a388a22..87a1a810a 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -345,6 +345,10 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
 
     K = xp.exp(-M / reg)
     # print(xp.min(K))
+    KtransposeU = xp.empty(v.shape)
+    tmp = xp.empty(K.shape)
+    tmp2 = xp.empty(b.shape)
+    ones = xp.ones(u.shape)
 
     Kp = (1 / a).reshape(-1, 1) * K
     cpt = 0
@@ -352,9 +356,10 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     while (err > stopThr and cpt < numItermax):
         uprev = u
         vprev = v
-        KtransposeU = xp.dot(K.T, u)
-        v = xp.divide(b, KtransposeU)
-        u = 1. / xp.dot(Kp, v)
+        K.T.dot(u, out=KtransposeU)
+        xp.divide(b, KtransposeU, out=v)
+        Kp.dot(v, out=u)
+        xp.divide(ones, u, out=u)
 
         if (xp.any(KtransposeU == 0) or
                 xp.any(xp.isnan(u)) or xp.any(xp.isnan(v)) or
@@ -372,8 +377,11 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
                 err = xp.sum((u - uprev)**2) / xp.sum((u)**2) + \
                     xp.sum((v - vprev)**2) / xp.sum((v)**2)
             else:
-                transp = u.reshape(-1, 1) * (K * v)
-                err = xp.linalg.norm((xp.sum(transp, axis=0) - b))**2
+                xp.multiply(u.reshape(-1, 1), K, out=tmp)
+                xp.multiply(tmp, v.reshape(1, -1), out=tmp)
+                xp.sum(tmp, axis=0, out=tmp2)
+                tmp2 -= b
+                err = xp.linalg.norm(tmp2)**2
             if log:
                 log['err'].append(err)
 

From eae9d2aa9592e1d3c0d5f1580fc1a2b42d77b51a Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Tue, 19 Sep 2017 13:23:25 +0200
Subject: [PATCH 10/42] Little speedup sinkhorn:

Before
```
Normal, time:   4.21 sec
   GPU, time:   3.45 sec
```
After
```
Normal, time:   3.70 sec
   GPU, time:   2.65 sec
```
---
 ot/bregman.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ot/bregman.py b/ot/bregman.py
index 87a1a810a..8a6a8ead3 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -343,7 +343,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
 
     # print(reg)
 
-    K = xp.exp(-M / reg)
+    K = xp.empty(M.shape)
+    xp.exp(M, out=K)
+    K /= -reg
     # print(xp.min(K))
     KtransposeU = xp.empty(v.shape)
     tmp = xp.empty(K.shape)

From a30f3aab76ac8dc94950012ebca66949d2933548 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Tue, 19 Sep 2017 16:16:29 +0200
Subject: [PATCH 11/42] Move test file

---
 test/test_gpu2.py => examples/test_gpu.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/test_gpu2.py => examples/test_gpu.py (100%)

diff --git a/test/test_gpu2.py b/examples/test_gpu.py
similarity index 100%
rename from test/test_gpu2.py
rename to examples/test_gpu.py

From 23869f8407665a7c9f70746ee8fc3434141d845b Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Wed, 20 Sep 2017 08:24:46 +0200
Subject: [PATCH 12/42] Fix mistake

---
 ot/bregman.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ot/bregman.py b/ot/bregman.py
index 8a6a8ead3..8f8f3d218 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -344,8 +344,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     # print(reg)
 
     K = xp.empty(M.shape)
-    xp.exp(M, out=K)
-    K /= -reg
+    xp.divide(M, -reg, out=K)
+    xp.exp(K, out=K)
+
     # print(xp.min(K))
     KtransposeU = xp.empty(v.shape)
     tmp = xp.empty(K.shape)

From dcfae0381a63e169746a46052512d6b12e2be3f2 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Wed, 20 Sep 2017 09:11:10 +0200
Subject: [PATCH 13/42] Replace xp by np

Improve the benchmark comparison script between CPU and GPU.
For me it now output the following:
        scipy's cdist, time:  10.28 sec
pairwiseEuclidean CPU, time:   0.63 sec
pairwiseEuclidean GPU, time:   0.33 sec
Sinkhorn CPU, time:   3.58 sec
Sinkhorn GPU, time:   2.63 sec
---
 examples/test_gpu.py | 54 ++++++++++++++++++++++++++++-----------
 ot/bregman.py        | 60 ++++++++++++++++++++++----------------------
 ot/utils.py          | 10 ++++----
 3 files changed, 74 insertions(+), 50 deletions(-)

diff --git a/examples/test_gpu.py b/examples/test_gpu.py
index 2dcbd6750..88f0eed3a 100644
--- a/examples/test_gpu.py
+++ b/examples/test_gpu.py
@@ -1,25 +1,49 @@
 # -*- coding: utf-8 -*-
 
 import time
+
 import numpy as np
-import ot
 import cupy as cp
+from scipy.spatial.distance import cdist
+import ot
 
-a = np.random.rand(10000, 100)
-b = np.random.rand(10000, 100)
 
-ot1 = ot.da.SinkhornTransport(gpu=False)
-ot2 = ot.da.SinkhornTransport(gpu=True)
+def benchDistance(a, b):
+    # First compare computation time for computing pairwise euclidean matrix
+    time1 = time.time()
+    M1 = cdist(a, b, metric="sqeuclidean")
+    time2 = time.time()
+    M2 = ot.utils.pairwiseEuclidean(a, b, gpu=False, squared=True)
+    time3 = time.time()
+    M3 = ot.utils.pairwiseEuclidean(a, b, gpu=True, squared=True)
+    time4 = time.time()
+
+    np.testing.assert_allclose(M1, M2, rtol=1e-5, atol=1e-5)
+    np.testing.assert_allclose(M2, cp.asnumpy(M3), rtol=1e-5, atol=1e-5)
+    print("        scipy's cdist, time: {:6.2f} sec ".format(time2 - time1))
+    print("pairwiseEuclidean CPU, time: {:6.2f} sec ".format(time3 - time2))
+    print("pairwiseEuclidean GPU, time: {:6.2f} sec ".format(time4 - time3))
 
-time1 = time.time()
-ot1.fit(Xs=a, Xt=b)
-g1 = ot1.coupling_
-time2 = time.time()
-ot2.fit(Xs=a, Xt=b)
-g2 = ot2.coupling_
-time3 = time.time()
 
+def benchSinkhorn(a, b):
+    # Then compare computation time for computing optimal sinkhorn coupling
+    ot1 = ot.da.SinkhornTransport(gpu=False)
+    ot2 = ot.da.SinkhornTransport(gpu=True)
 
-print("Normal, time: {:6.2f} sec ".format(time2 - time1))
-print("   GPU, time: {:6.2f} sec ".format(time3 - time2))
-np.testing.assert_allclose(g1, cp.asnumpy(g2), rtol=1e-5, atol=1e-5)
+    time1 = time.time()
+    ot1.fit(Xs=a, Xt=b)
+    g1 = ot1.coupling_
+    time2 = time.time()
+    ot2.fit(Xs=a, Xt=b)
+    g2 = ot2.coupling_
+    time3 = time.time()
+
+    print("Sinkhorn CPU, time: {:6.2f} sec ".format(time2 - time1))
+    print("Sinkhorn GPU, time: {:6.2f} sec ".format(time3 - time2))
+    np.testing.assert_allclose(g1, cp.asnumpy(g2), rtol=1e-5, atol=1e-5)
+
+
+a = np.random.rand(10000, 100)
+b = np.random.rand(10000, 100)
+benchDistance(a, b)
+benchSinkhorn(a, b)
diff --git a/ot/bregman.py b/ot/bregman.py
index 8f8f3d218..93deac2df 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -309,16 +309,16 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     ot.optim.cg : General regularized OT
 
     """
-    xp = get_array_module(a, b, M)
+    np = get_array_module(a, b, M)
 
-    a = xp.asarray(a, dtype=xp.float64)
-    b = xp.asarray(b, dtype=xp.float64)
-    M = xp.asarray(M, dtype=xp.float64)
+    a = np.asarray(a, dtype=np.float64)
+    b = np.asarray(b, dtype=np.float64)
+    M = np.asarray(M, dtype=np.float64)
 
     if len(a) == 0:
-        a = xp.ones((M.shape[0],), dtype=xp.float64) / M.shape[0]
+        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
     if len(b) == 0:
-        b = xp.ones((M.shape[1],), dtype=xp.float64) / M.shape[1]
+        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
     # init data
     Nini = len(a)
@@ -335,23 +335,23 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     # we assume that no distances are null except those of the diagonal of
     # distances
     if nbb:
-        u = xp.ones((Nini, nbb)) / Nini
-        v = xp.ones((Nfin, nbb)) / Nfin
+        u = np.ones((Nini, nbb)) / Nini
+        v = np.ones((Nfin, nbb)) / Nfin
     else:
-        u = xp.ones(Nini) / Nini
-        v = xp.ones(Nfin) / Nfin
+        u = np.ones(Nini) / Nini
+        v = np.ones(Nfin) / Nfin
 
     # print(reg)
 
-    K = xp.empty(M.shape)
-    xp.divide(M, -reg, out=K)
-    xp.exp(K, out=K)
+    K = np.empty(M.shape)
+    np.divide(M, -reg, out=K)
+    np.exp(K, out=K)
 
     # print(xp.min(K))
-    KtransposeU = xp.empty(v.shape)
-    tmp = xp.empty(K.shape)
-    tmp2 = xp.empty(b.shape)
-    ones = xp.ones(u.shape)
+    KtransposeU = np.empty(v.shape)
+    tmp = np.empty(K.shape)
+    tmp2 = np.empty(b.shape)
+    ones = np.ones(u.shape)
 
     Kp = (1 / a).reshape(-1, 1) * K
     cpt = 0
@@ -360,13 +360,13 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
         uprev = u
         vprev = v
         K.T.dot(u, out=KtransposeU)
-        xp.divide(b, KtransposeU, out=v)
+        np.divide(b, KtransposeU, out=v)
         Kp.dot(v, out=u)
-        xp.divide(ones, u, out=u)
+        np.divide(ones, u, out=u)
 
-        if (xp.any(KtransposeU == 0) or
-                xp.any(xp.isnan(u)) or xp.any(xp.isnan(v)) or
-                xp.any(xp.isinf(u)) or xp.any(xp.isinf(v))):
+        if (np.any(KtransposeU == 0) or
+                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
+                np.any(np.isinf(u)) or np.any(np.isinf(v))):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
@@ -377,14 +377,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
             # we can speed up the process by checking for the error only all
             # the 10th iterations
             if nbb:
-                err = xp.sum((u - uprev)**2) / xp.sum((u)**2) + \
-                    xp.sum((v - vprev)**2) / xp.sum((v)**2)
+                err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+                    np.sum((v - vprev)**2) / np.sum((v)**2)
             else:
-                xp.multiply(u.reshape(-1, 1), K, out=tmp)
-                xp.multiply(tmp, v.reshape(1, -1), out=tmp)
-                xp.sum(tmp, axis=0, out=tmp2)
+                np.multiply(u.reshape(-1, 1), K, out=tmp)
+                np.multiply(tmp, v.reshape(1, -1), out=tmp)
+                np.sum(tmp, axis=0, out=tmp2)
                 tmp2 -= b
-                err = xp.linalg.norm(tmp2)**2
+                err = np.linalg.norm(tmp2)**2
             if log:
                 log['err'].append(err)
 
@@ -399,9 +399,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
         log['v'] = v
 
     if nbb:  # return only loss
-        res = xp.zeros((nbb))
+        res = np.zeros((nbb))
         for i in range(nbb):
-            res[i] = xp.sum(
+            res[i] = np.sum(
                 u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M)
         if log:
             return res, log
diff --git a/ot/utils.py b/ot/utils.py
index 44674ff25..b76c0d4ab 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -109,17 +109,17 @@ def pairwiseEuclidean(a, b, gpu=False, squared=False):
     # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
     if gpu:
         a, b = to_gpu(a, b)
-    xp = get_array_module(a, b)
+    np = get_array_module(a, b)
 
     # Multiply a by b transpose to obtain in each cell [i,j] of c the
     # value sum{k in range(f)} ( a[i,k]b[j,k] )
     c = a.dot(b.T)
     # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
-    xp.multiply(c, -2, out=c)
+    np.multiply(c, -2, out=c)
 
     # Compute the vectors of the sum of squared elements.
-    a = xp.power(a, 2).sum(axis=1)
-    b = xp.power(b, 2).sum(axis=1)
+    a = np.power(a, 2).sum(axis=1)
+    b = np.power(b, 2).sum(axis=1)
 
     # Add the vectors in each columns (respectivly rows) of c.
     # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
@@ -128,7 +128,7 @@ def pairwiseEuclidean(a, b, gpu=False, squared=False):
     c += b
 
     if not squared:
-        xp.sqrt(c, out=c)
+        np.sqrt(c, out=c)
 
     return c
 

From 9d9d99691de66b70bc7fa22c987611b6775bbb97 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Thu, 28 Sep 2017 15:04:09 +0200
Subject: [PATCH 14/42] Compute sinkhorn using float type of inputs

---
 examples/test_gpu.py | 10 ++++++----
 ot/bregman.py        | 29 ++++++++++++++---------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/examples/test_gpu.py b/examples/test_gpu.py
index 88f0eed3a..e7b4ec0c6 100644
--- a/examples/test_gpu.py
+++ b/examples/test_gpu.py
@@ -43,7 +43,9 @@ def benchSinkhorn(a, b):
     np.testing.assert_allclose(g1, cp.asnumpy(g2), rtol=1e-5, atol=1e-5)
 
 
-a = np.random.rand(10000, 100)
-b = np.random.rand(10000, 100)
-benchDistance(a, b)
-benchSinkhorn(a, b)
+for tp in [np.float32, np.float64]:
+    print("Using " + str(tp))
+    a = np.random.rand(10000, 100).astype(tp)
+    b = np.random.rand(10000, 100).astype(tp)
+    benchDistance(a, b)
+    benchSinkhorn(a, b)
diff --git a/ot/bregman.py b/ot/bregman.py
index 93deac2df..473dde7d5 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -310,15 +310,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
 
     """
     np = get_array_module(a, b, M)
-
-    a = np.asarray(a, dtype=np.float64)
-    b = np.asarray(b, dtype=np.float64)
-    M = np.asarray(M, dtype=np.float64)
+    a = np.asarray(a, dtype=M.dtype)
+    b = np.asarray(b, dtype=M.dtype)
+    M = np.asarray(M, dtype=M.dtype)
 
     if len(a) == 0:
-        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+        a = np.ones((M.shape[0],), dtype=M.dtype) / M.shape[0]
     if len(b) == 0:
-        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+        b = np.ones((M.shape[1],), dtype=M.dtype) / M.shape[1]
 
     # init data
     Nini = len(a)
@@ -335,23 +334,23 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     # we assume that no distances are null except those of the diagonal of
     # distances
     if nbb:
-        u = np.ones((Nini, nbb)) / Nini
-        v = np.ones((Nfin, nbb)) / Nfin
+        u = np.ones((Nini, nbb), dtype=M.dtype) / Nini
+        v = np.ones((Nfin, nbb), dtype=M.dtype) / Nfin
     else:
-        u = np.ones(Nini) / Nini
-        v = np.ones(Nfin) / Nfin
+        u = np.ones(Nini, dtype=M.dtype) / Nini
+        v = np.ones(Nfin, dtype=M.dtype) / Nfin
 
     # print(reg)
 
-    K = np.empty(M.shape)
+    K = np.empty(M.shape, dtype=M.dtype)
     np.divide(M, -reg, out=K)
     np.exp(K, out=K)
 
     # print(xp.min(K))
-    KtransposeU = np.empty(v.shape)
-    tmp = np.empty(K.shape)
-    tmp2 = np.empty(b.shape)
-    ones = np.ones(u.shape)
+    KtransposeU = np.empty(v.shape, dtype=M.dtype)
+    tmp = np.empty(K.shape, dtype=M.dtype)
+    tmp2 = np.empty(b.shape, dtype=M.dtype)
+    ones = np.ones(u.shape, dtype=M.dtype)
 
     Kp = (1 / a).reshape(-1, 1) * K
     cpt = 0

From d6157db332edfce99045a0f7f0eef696e11ca3aa Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Mon, 9 Oct 2017 13:31:08 +0200
Subject: [PATCH 15/42] =?UTF-8?q?Add=20decorator=20from=20R=C3=A9mi?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TODO:
 - add GPU implementation for LpL1 using the decorator
 - remove the no longer used ot.gpu module
---
 ot/bregman.py |   4 +-
 ot/da.py      |   8 +++-
 ot/utils.py   | 102 +++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/ot/bregman.py b/ot/bregman.py
index 473dde7d5..7a27fd336 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -9,9 +9,10 @@
 # License: MIT License
 
 import numpy as np
-from .utils import get_array_module
+from .utils import get_array_module, gpu_fun
 
 
+@gpu_fun(in_arrays=[0, 1, 2], out_arrays=[0])
 def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
     u"""
     Solve the entropic regularization optimal transport problem and return the OT matrix
@@ -234,6 +235,7 @@ def sink():
     return sink()
 
 
+@gpu_fun(in_arrays=[0, 1, 2], out_arrays=[0])
 def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
     """
     Solve the entropic regularization optimal transport problem and return the OT matrix
diff --git a/ot/da.py b/ot/da.py
index f00d37e78..a065f2cb3 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 
-from .bregman import sinkhorn
+from .bregman import sinkhorn, sinkhorn_knopp
 from .lp import emd
 from .utils import unif, dist, pairwiseEuclidean, kernel, cost_normalization
 from .utils import check_params, deprecated, BaseEstimator, to_gpu
@@ -1280,10 +1280,16 @@ class label
         super(SinkhornTransport, self).fit(Xs, ys, Xt, yt)
 
         # coupling estimation
+        """
         returned_ = sinkhorn(
             a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e,
             numItermax=self.max_iter, stopThr=self.tol,
             verbose=self.verbose, log=self.log)
+        """
+        returned_ = sinkhorn_knopp(
+            a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e,
+            numItermax=self.max_iter, stopThr=self.tol,
+            verbose=self.verbose, log=self.log, gpu=self.gpu)
 
         # deal with the value of log
         if self.log:
diff --git a/ot/utils.py b/ot/utils.py
index b76c0d4ab..5c73bc7ee 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -77,7 +77,93 @@ def clean_zeros(a, b, M):
     return a2, b2, M2
 
 
-def pairwiseEuclidean(a, b, gpu=False, squared=False):
+class gpu_fun(object):
+    """Decorator to handle gpu upload and download
+    >>> @gpu()
+    ... def some_function(): pass
+    Parameters
+    ----------
+    extra : string
+          to be added to the deprecation messages
+    """
+
+    # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary,
+    # but with many changes.
+
+    def __init__(self, in_arrays=[], out_arrays=[]):
+        self.in_arrays = in_arrays
+        self.out_arrays = out_arrays
+
+    def __call__(self, obj):
+        """Call method
+        Parameters
+        ----------
+        obj : object
+        """
+        if isinstance(obj, type):
+            return self._decorate_class(obj)
+        else:
+            return self._decorate_fun(obj)
+
+    def _decorate_class(self, cls):
+        # TODO: we should probably reset __new__ for full generality
+        # init = cls.__init__
+        # def wrapped(*args, **kwargs):
+        #     warnings.warn(msg, category=DeprecationWarning)
+        #     return init(*args, **kwargs)
+        # cls.__init__ = wrapped
+        # wrapped.__name__ = '__init__'
+        # wrapped.__doc__ = self._update_doc(init.__doc__)
+        # wrapped.deprecated_original = init
+
+        return cls
+
+    def _decorate_fun(self, fun):
+        """Decorate function fun"""
+
+        def wrapped(*args, **kwargs):
+
+            if cp and 'gpu' in kwargs and kwargs['gpu']:
+                # convert args to gpu twhose index are in in_array
+                args = [to_gpu(arg) if i in self.in_arrays else arg
+                        for i, arg in enumerate(args)]
+
+            # clean kwargs
+            if 'gpu' in kwargs:
+                kwargs.pop('gpu')
+            if 'to_np' in kwargs and kwargs['to_np']:
+                to_np = True
+                kwargs.pop('to_np')
+            else:
+                to_np = False
+
+            ret = fun(*args, **kwargs)
+
+            if cp and to_np:
+                if type(ret) == tuple:
+                    ret = (cp.asnumpy(r) if i in self.out_arrays else r
+                           for i, r in enumerate(ret))
+                else:
+                    if 0 in self.out_arrays:
+                        ret = cp.asnumpy(ret)
+
+            return ret
+
+        wrapped.__name__ = fun.__name__
+        wrapped.__dict__ = fun.__dict__
+        wrapped.__doc__ = self._update_doc(fun.__doc__)
+
+        return wrapped
+
+    def _update_doc(self, olddoc):
+
+        # TODO!!!
+        newdoc = olddoc
+        return newdoc
+
+
+@gpu_fun([0, 1], [0])
+def pairwiseEuclidean(a, b, squared=False):
     """
     Compute the pairwise euclidean distance between matrices a and b.
 
@@ -107,8 +193,6 @@ def pairwiseEuclidean(a, b, gpu=False, squared=False):
     # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
     # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
     # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
-    if gpu:
-        a, b = to_gpu(a, b)
     np = get_array_module(a, b)
 
     # Multiply a by b transpose to obtain in each cell [i,j] of c the
@@ -294,11 +378,17 @@ def get_array_module(*args):
 
 
 def to_gpu(*args):
-    """ Upload numpy array to GPU if available and return them"""
+    """ Upload numpy arrays to GPU if available and return them"""
     if cp:
-        return (cp.asarray(x) for x in args)
+        if len(args) > 1:
+            return (cp.asarray(x) for x in args)
+        else:
+            return cp.asarray(args[0])
     else:
-        return args
+        if len(args) > 1:
+            return args
+        else:
+            return args[0]
 
 
 class deprecated(object):

From 131b7117f7840a3169eefd8d59d96ec30979c526 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Sun, 3 Dec 2017 11:44:38 +0100
Subject: [PATCH 16/42] gpu changes:

- Modify the numpy implementation of ot lpl1 to be compatible with cupy
- Remove the no longer used submodule ot.gpu as the cpu and gpu code is now the same!

- I changed the little cpu vs gpu benchmark. Here are the results in my machine
Using <class 'numpy.float32'>
        scipy's cdist, time:   2.61 sec
pairwiseEuclidean CPU, time:   0.10 sec
pairwiseEuclidean GPU, time:   0.00 sec
Sinkhorn CPU, time:   0.57 sec
Sinkhorn GPU, time:   0.09 sec
Sinkhorn LpL1 CPU, time:  25.12 sec
Sinkhorn LpL1 GPU, time:   5.50 sec
Using <class 'numpy.float64'>
        scipy's cdist, time:   2.69 sec
pairwiseEuclidean CPU, time:   0.21 sec
pairwiseEuclidean GPU, time:   0.00 sec
Sinkhorn CPU, time:   1.02 sec
Sinkhorn GPU, time:   0.55 sec
Sinkhorn LpL1 CPU, time:  24.06 sec
Sinkhorn LpL1 GPU, time:   5.50 sec
---
 examples/test_gpu.py |  25 ++-
 ot/bregman.py        |   4 +-
 ot/da.py             |  25 ++-
 ot/gpu/__init__.py   |  12 --
 ot/gpu/bregman.py    | 175 ----------------
 ot/gpu/da.py         | 460 -------------------------------------------
 ot/utils.py          |  24 +--
 7 files changed, 48 insertions(+), 677 deletions(-)
 delete mode 100644 ot/gpu/__init__.py
 delete mode 100644 ot/gpu/bregman.py
 delete mode 100644 ot/gpu/da.py

diff --git a/examples/test_gpu.py b/examples/test_gpu.py
index e7b4ec0c6..cca7f56e6 100644
--- a/examples/test_gpu.py
+++ b/examples/test_gpu.py
@@ -25,7 +25,7 @@ def benchDistance(a, b):
     print("pairwiseEuclidean GPU, time: {:6.2f} sec ".format(time4 - time3))
 
 
-def benchSinkhorn(a, b):
+def benchSinkhorn(a, labels_a, b):
     # Then compare computation time for computing optimal sinkhorn coupling
     ot1 = ot.da.SinkhornTransport(gpu=False)
     ot2 = ot.da.SinkhornTransport(gpu=True)
@@ -42,10 +42,27 @@ def benchSinkhorn(a, b):
     print("Sinkhorn GPU, time: {:6.2f} sec ".format(time3 - time2))
     np.testing.assert_allclose(g1, cp.asnumpy(g2), rtol=1e-5, atol=1e-5)
 
+    otlpl1 = ot.da.SinkhornLpl1Transport(gpu=False)
+    otlpl2 = ot.da.SinkhornLpl1Transport(gpu=True)
+    time1 = time.time()
+    otlpl1.fit(Xs=a, ys=labels_a, Xt=b)
+    g1 = otlpl1.coupling_
+    time2 = time.time()
+    otlpl2.fit(Xs=a, ys=labels_a, Xt=b)
+    g2 = otlpl2.coupling_
+    time3 = time.time()
+
+    print("Sinkhorn LpL1 CPU, time: {:6.2f} sec ".format(time2 - time1))
+    print("Sinkhorn LpL1 GPU, time: {:6.2f} sec ".format(time3 - time2))
+    np.testing.assert_allclose(g1, cp.asnumpy(g2), rtol=1e-5, atol=1e-5)
+
 
 for tp in [np.float32, np.float64]:
     print("Using " + str(tp))
-    a = np.random.rand(10000, 100).astype(tp)
-    b = np.random.rand(10000, 100).astype(tp)
+    n = 5000
+    d = 100
+    a = np.random.rand(n, d).astype(tp)
+    labels_a = (np.random.rand(n, 1) * 2).astype(int).ravel()
+    b = np.random.rand(n, d).astype(tp)
     benchDistance(a, b)
-    benchSinkhorn(a, b)
+    benchSinkhorn(a, labels_a, b)
diff --git a/ot/bregman.py b/ot/bregman.py
index 7a27fd336..b29e2022d 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -14,7 +14,7 @@
 
 @gpu_fun(in_arrays=[0, 1, 2], out_arrays=[0])
 def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
-    u"""
+    """
     Solve the entropic regularization optimal transport problem and return the OT matrix
 
     The function solves the following optimization problem:
@@ -348,7 +348,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
     np.divide(M, -reg, out=K)
     np.exp(K, out=K)
 
-    # print(xp.min(K))
+    # print(np.min(K))
     KtransposeU = np.empty(v.shape, dtype=M.dtype)
     tmp = np.empty(K.shape, dtype=M.dtype)
     tmp2 = np.empty(b.shape, dtype=M.dtype)
diff --git a/ot/da.py b/ot/da.py
index a065f2cb3..9eb3f282e 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -14,14 +14,16 @@
 from .bregman import sinkhorn, sinkhorn_knopp
 from .lp import emd
 from .utils import unif, dist, pairwiseEuclidean, kernel, cost_normalization
-from .utils import check_params, deprecated, BaseEstimator, to_gpu
+from .utils import check_params, deprecated, BaseEstimator, to_gpu, gpu_fun
+from .utils import get_array_module
 from .optim import cg
 from .optim import gcg
 
 
+@gpu_fun(in_arrays=[0, 1, 2, 3], out_arrays=[0])
 def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
                      numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
-                     log=False):
+                     log=False, **kwargs):
     """
     Solve the entropic regularization optimal transport problem with nonconvex
     group lasso regularization
@@ -104,14 +106,21 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
     ot.optim.cg : General regularized OT
 
     """
+    np = get_array_module(a, b, M)
     p = 0.5
     epsilon = 1e-3
 
     indices_labels = []
-    classes = np.unique(labels_a)
+
+    # Get unique labels. np.unique not used because it doesn't exist in cupy.
+    flt = labels_a.flatten()
+    flt.sort()
+    classes = flt[np.concatenate((np.array([True]), flt[1:] != flt[:-1]))]
+
+    # For each class, store the indexes of the class
     for c in classes:
         idxc, = np.where(labels_a == c)
-        indices_labels.append(idxc)
+        indices_labels.append(np.asarray(idxc.reshape(1, -1)))
 
     W = np.zeros(M.shape)
 
@@ -1011,8 +1020,6 @@ class label
             # distribution estimation
             self.mu_s = self.distribution_estimation(Xs)
             self.mu_t = self.distribution_estimation(Xt)
-            if self.gpu:
-                self.mu_s, self.mu_t = to_gpu(self.mu_s, self.mu_t)
 
             # store arrays of samples
             self.xs_ = Xs
@@ -1287,7 +1294,7 @@ class label
             verbose=self.verbose, log=self.log)
         """
         returned_ = sinkhorn_knopp(
-            a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e,
+            self.mu_s, self.mu_t, self.cost_, reg=self.reg_e,
             numItermax=self.max_iter, stopThr=self.tol,
             verbose=self.verbose, log=self.log, gpu=self.gpu)
 
@@ -1492,10 +1499,10 @@ class label
             super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt)
 
             self.coupling_ = sinkhorn_lpl1_mm(
-                a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_,
+                self.mu_s, ys, self.mu_t, self.cost_,
                 reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
                 numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
-                verbose=self.verbose)
+                verbose=self.verbose, gpu=self.gpu)
 
         return self
 
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
deleted file mode 100644
index 6edb518b9..000000000
--- a/ot/gpu/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from . import bregman
-from . import da
-from .bregman import sinkhorn_knopp
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#         Leo Gautheron <https://github.com/aje> 
-#
-# License: MIT License
-
-__all__ = ["bregman", "da", "sinkhorn_knopp"]
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
deleted file mode 100644
index efae1227c..000000000
--- a/ot/gpu/bregman.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Bregman projections for regularized OT with GPU
-"""
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#         Leo Gautheron <https://github.com/aje>
-#
-# License: MIT License
-
-import numpy as np
-import cupy as cp
-
-
-def sinkhorn_knopp(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9,
-                   verbose=False, log=False, returnAsGPU=False):
-    r"""
-    Solve the entropic regularization optimal transport problem on GPU
-
-    The function solves the following optimization problem:
-
-    .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
-
-        s.t. \gamma 1 = a
-
-             \gamma^T 1= b
-
-             \gamma\geq 0
-    where :
-
-    - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega` is the entropic regularization term
-      :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
-
-    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
-    scaling algorithm as proposed in [2]_
-
-
-    Parameters
-    ----------
-    a : np.ndarray (ns,)
-        samples weights in the source domain
-    b : np.ndarray (nt,)
-        samples in the target domain
-    M_GPU : cupy.ndarray (ns,nt)
-        loss matrix
-    reg : float
-        Regularization term >0
-    numItermax : int, optional
-        Max number of iterations
-    stopThr : float, optional
-        Stop threshol on error (>0)
-    verbose : bool, optional
-        Print information along iterations
-    log : bool, optional
-        record log if True
-    returnAsGPU : bool, optional
-        return the OT matrix as a cupy.ndarray
-
-    Returns
-    -------
-    gamma : (ns x nt) np.ndarray or cupy.ndarray
-        Optimal transportation matrix for the given parameters
-    log : dict
-        log dictionary return only if log==True in parameters
-
-    Examples
-    --------
-
-    >>> import ot
-    >>> a=[.5,.5]
-    >>> b=[.5,.5]
-    >>> M=[[0.,1.],[1.,0.]]
-    >>> ot.sinkhorn(a,b,M,1)
-    array([[ 0.36552929,  0.13447071],
-           [ 0.13447071,  0.36552929]])
-
-
-    References
-    ----------
-
-    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
-    Transport, Advances in Neural Information Processing Systems (NIPS) 26,
-    2013.
-
-
-    See Also
-    --------
-    ot.lp.emd : Unregularized OT
-    ot.optim.cg : General regularized OT
-
-    """
-    # init data
-    Nini = len(a)
-    Nfin = len(b)
-
-    if log:
-        log = {'err': []}
-
-    # we assume that no distances are null except those of the diagonal of
-    # distances
-    u = (np.ones(Nini) / Nini).reshape((Nini, 1))
-    u_GPU = cp.array(u)
-    a_GPU = cp.array(a.reshape((Nini, 1)))
-    ones_GPU = cp.ones(u_GPU.shape)
-    v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1))
-    v_GPU = cp.array(v)
-    b_GPU = cp.array(b.reshape((Nfin, 1))).reshape(-1)
-    M_GPU = cp.divide(M_GPU, -reg)
-
-    K_GPU = cp.exp(M_GPU)
-
-    cp.divide(ones_GPU, a_GPU, out=a_GPU)
-    Kp_GPU = a_GPU.reshape(-1, 1) * K_GPU
-
-    tmp_GPU = cp.empty(K_GPU.shape)
-    tmp2_GPU = cp.empty(b_GPU.shape)
-    KtransposeU_GPU = cp.empty(v_GPU.shape)
-
-    cpt = 0
-    err = 1
-
-    while (err > stopThr and cpt < numItermax):
-        uprev_GPU = u_GPU
-        vprev_GPU = v_GPU
-        K_GPU.T.dot(u_GPU, out=KtransposeU_GPU)
-        cp.divide(b_GPU.reshape(-1, 1), KtransposeU_GPU, out=v_GPU)
-        Kp_GPU.dot(v_GPU, out=u_GPU)
-        cp.divide(ones_GPU, u_GPU, out=u_GPU)
-
-        if (cp.any(KtransposeU_GPU == 0) or
-                cp.any(cp.isnan(u_GPU)) or cp.any(cp.isnan(v_GPU)) or
-                cp.any(cp.isinf(u_GPU)) or cp.any(cp.isinf(v_GPU))):
-
-            # we have reached the machine precision
-            # come back to previous solution and quit loop
-            print('Warning: numerical errors at iteration', cpt)
-            u_GPU = uprev_GPU
-            v_GPU = vprev_GPU
-            break
-        if cpt % 10 == 0:
-            # we can speed up the process by checking for the error only all
-            # the 10th iterations
-
-            cp.multiply(u_GPU.reshape(-1, 1), K_GPU, out=tmp_GPU)
-            cp.multiply(tmp_GPU, v_GPU.reshape(1, -1), out=tmp_GPU)
-            cp.sum(tmp_GPU, axis=0, out=tmp2_GPU)
-            tmp2_GPU -= b_GPU
-            err = cp.linalg.norm(tmp2_GPU)**2
-            if log:
-                log['err'].append(err)
-
-            if verbose:
-                if cpt % 200 == 0:
-                    print(
-                        '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
-                print('{:5d}|{:8e}|'.format(cpt, err))
-        cpt += 1
-    if log:
-        log['u'] = cp.asnumpy(u_GPU)
-        log['v'] = cp.asnumpy(v_GPU)
-
-    K_GPU = u_GPU.reshape(-1, 1) * K_GPU * v_GPU.reshape(1, -1)
-
-    if returnAsGPU:
-        res = K_GPU
-    else:
-        res = cp.asnumpy(K_GPU)
-
-    if log:
-        return res, log
-    else:
-        return res
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
deleted file mode 100644
index abbb027f5..000000000
--- a/ot/gpu/da.py
+++ /dev/null
@@ -1,460 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Domain adaptation with optimal transport with GPU implementation
-"""
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#         Nicolas Courty <ncourty@irisa.fr>
-#         Michael Perrot <michael.perrot@univ-st-etienne.fr>
-#         Leo Gautheron <https://github.com/aje>
-#
-# License: MIT License
-
-
-import numpy as np
-import cupy as cp
-
-from ..utils import check_params
-from ..da import BaseTransport, distribution_estimation_uniform
-from .bregman import sinkhorn_knopp
-
-
-def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
-    """
-    Compute the pairwise euclidean distance between matrices a and b.
-
-
-    Parameters
-    ----------
-    a : np.ndarray (n, f)
-        first matrix
-    b : np.ndarray (m, f)
-        second matrix
-    returnAsGPU : boolean, optional (default False)
-        if True, returns cupy matrix still on GPU, else return np.ndarray
-    squared : boolean, optional (default False)
-        if True, return squared euclidean distance matrix
-
-
-    Returns
-    -------
-    c : (n x m) np.ndarray or cupy.ndarray
-        pairwise euclidean distance distance matrix
-    """
-    # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
-    # First compute in c_GPU the squared euclidean distance. And return its
-    # square root. At each cell [i,j] of c, we want to have
-    # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
-    # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
-    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
-    a_GPU = cp.asarray(a)
-    b_GPU = cp.asarray(b)
-
-    # Multiply a by b transpose to obtain in each cell [i,j] of c the
-    # value sum{k in range(f)} ( a[i,k]b[j,k] )
-    c_GPU = a_GPU.dot(b_GPU.T)
-    # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
-    cp.multiply(c_GPU, -2, out=c_GPU)
-
-    # Compute the vectors of the sum of squared elements.
-    a_GPU = cp.power(a_GPU, 2).sum(axis=1)
-    b_GPU = cp.power(b_GPU, 2).sum(axis=1)
-
-    # Add the vectors in each columns (respectivly rows) of c.
-    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
-    c_GPU += a_GPU.reshape(-1, 1)
-    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
-    c_GPU += b_GPU
-
-    if not squared:
-        cp.sqrt(c_GPU, out=c_GPU)
-
-    if returnAsGPU:
-        return c_GPU
-    else:
-        return cp.asnumpy(c_GPU)
-
-
-def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
-                     numInnerItermax=200, stopInnerThr=1e-9,
-                     verbose=False, log=False):
-    """
-    Solve the entropic regularization optimal transport problem with nonconvex
-    group lasso regularization.
-
-    The function solves the following optimization problem:
-
-    .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) +
-                 \eta \Omega_g(\gamma)
-
-        s.t. \gamma 1 = a
-
-             \gamma^T 1= b
-
-             \gamma\geq 0
-    where :
-
-    - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega_e` is the entropic regularization term
-      :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - :math:`\Omega_g` is the group lasso  regulaization term
-      :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
-      where  :math:`\mathcal{I}_c` are the index of samples from class c in the
-      source domain.
-    - a and b are source and target weights (sum to 1)
-
-    The algorithm used for solving the problem is the generalised conditional
-    gradient as proposed in  [5]_ [7]_
-
-
-    Parameters
-    ----------
-    a : np.ndarray (ns,)
-        samples weights in the source domain
-    labels_a : np.ndarray (ns,)
-        labels of samples in the source domain
-    b : np.ndarray (nt,)
-        samples weights in the target domain
-    M_GPU : cupy.ndarray (ns,nt)
-        loss matrix
-    reg : float
-        Regularization term for entropic regularization >0
-    eta : float, optional
-        Regularization term  for group lasso regularization >0
-    numItermax : int, optional
-        Max number of iterations
-    numInnerItermax : int, optional
-        Max number of iterations (inner sinkhorn solver)
-    stopInnerThr : float, optional
-        Stop threshold on error (inner sinkhorn solver) (>0)
-    verbose : bool, optional
-        Print information along iterations
-    log : bool, optional
-        record log if True
-
-
-    Returns
-    -------
-    gamma : (ns x nt) np.ndarray
-        Optimal transportation matrix for the given parameters
-    log : dict
-        log dictionary return only if log==True in parameters
-
-
-    References
-    ----------
-
-    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport
-           for Domain Adaptation," in IEEE Transactions on Pattern Analysis and
-           Machine Intelligence , vol.PP, no.99, pp.1-1
-    .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized
-           conditional gradient: analysis of convergence and applications.
-           arXiv preprint arXiv:1510.06567.
-
-    See Also
-    --------
-    ot.lp.emd : Unregularized OT
-    ot.bregman.sinkhorn : Entropic regularized OT
-    ot.optim.cg : General regularized OT
-
-    """
-    p = 0.5
-    epsilon = 1e-3
-
-    indices_labels = []
-    classes = np.unique(labels_a)
-    for c in classes:
-        idxc, = np.where(labels_a == c)
-        indices_labels.append(cp.asarray(idxc.reshape(1, -1)))
-
-    W_GPU = cp.zeros(M_GPU.shape)
-    Mreg_GPU = cp.empty(M_GPU.shape)
-    for cpt in range(numItermax):
-        cp.multiply(W_GPU, eta, out=Mreg_GPU)
-        Mreg_GPU += M_GPU
-        transp_GPU = sinkhorn_knopp(a, b, Mreg_GPU, reg,
-                                    numItermax=numInnerItermax,
-                                    stopThr=stopInnerThr, returnAsGPU=True)
-        # the transport has been computed. Check if classes are really
-        # separated
-        W_GPU.fill(1)
-        for (i, c) in enumerate(classes):
-            majs_GPU = cp.sum(transp_GPU[indices_labels[i]][0], axis=0)
-            majs_GPU = p * ((majs_GPU + epsilon)**(p - 1))
-            W_GPU[indices_labels[i]] = majs_GPU
-
-    return cp.asnumpy(transp_GPU)
-
-
-def cost_normalization(C, norm=None):
-    """ Apply normalization to the loss matrix
-
-
-    Parameters
-    ----------
-    C : cupy.array (n1, n2)
-        The cost matrix to normalize.
-    norm : str
-        type of normalization from 'median','max','log','loglog'. Any other
-        value do not normalize.
-
-
-    Returns
-    -------
-
-    C : cupy.array (n1, n2)
-        The input cost matrix normalized according to given norm.
-
-    """
-
-    if norm == "median":
-        C = cp.divide(np.median(cp.asnumpy(C)))
-    elif norm == "max":
-        C = cp.divide(cp.max(C))
-    elif norm == "log":
-        C = cp.log(1 + C)
-    elif norm == "loglog":
-        C = cp.log(1 + cp.log(1 + C))
-
-    return C
-
-
-class SinkhornTransport(BaseTransport):
-    """Domain Adapatation OT method based on Sinkhorn Algorithm
-
-    Parameters
-    ----------
-    reg_e : float, optional (default=1)
-        Entropic regularization parameter
-    max_iter : int, float, optional (default=1000)
-        The minimum number of iteration before stopping the optimization
-        algorithm if no it has not converged
-    tol : float, optional (default=10e-9)
-        The precision required to stop the optimization algorithm.
-    mapping : string, optional (default="barycentric")
-        The kind of mapping to apply to transport samples from a domain into
-        another one.
-        if "barycentric" only the samples used to estimate the coupling can
-        be transported from a domain to another one.
-    metric : string, optional (default="sqeuclidean")
-        The ground metric for the Wasserstein problem
-    norm : string, optional (default=None)
-        If given, normalize the ground metric to avoid numerical errors that
-        can occur with large metric values.
-    distribution : string, optional (default="uniform")
-        The kind of distribution estimation to employ
-    verbose : int, optional (default=0)
-        Controls the verbosity of the optimization algorithm
-    log : int, optional (default=0)
-        Controls the logs of the optimization algorithm
-    limit_max: float, optional (defaul=np.infty)
-        Controls the semi supervised mode. Transport between labeled source
-        and target samples of different classes will exhibit an infinite cost
-
-    Attributes
-    ----------
-    coupling_ : array-like, shape (n_source_samples, n_target_samples)
-        The optimal coupling
-    log_ : dictionary
-        The dictionary of log, empty dic if parameter log is not True
-
-    References
-    ----------
-    .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
-           "Optimal Transport for Domain Adaptation," in IEEE Transactions
-           on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
-    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
-           Transport, Advances in Neural Information Processing Systems (NIPS)
-           26, 2013
-    """
-
-    def __init__(self, reg_e=1., max_iter=1000,
-                 tol=10e-9, verbose=False, log=False,
-                 metric="sqeuclidean", norm=None,
-                 distribution_estimation=distribution_estimation_uniform,
-                 out_of_sample_map='ferradans', limit_max=np.infty):
-
-        self.reg_e = reg_e
-        self.max_iter = max_iter
-        self.tol = tol
-        self.verbose = verbose
-        self.log = log
-        self.metric = metric
-        self.norm = norm
-        self.limit_max = limit_max
-        self.distribution_estimation = distribution_estimation
-        self.out_of_sample_map = out_of_sample_map
-
-    def fit(self, Xs=None, ys=None, Xt=None, yt=None):
-        """Build a coupling matrix from source and target sets of samples
-        (Xs, ys) and (Xt, yt)
-
-        Parameters
-        ----------
-        Xs : array-like, shape (n_source_samples, n_features)
-            The training input samples.
-        ys : array-like, shape (n_source_samples,)
-            The class labels
-        Xt : array-like, shape (n_target_samples, n_features)
-            The training input samples.
-        yt : array-like, shape (n_target_samples,)
-            The class labels.
-
-        Returns
-        -------
-        self : object
-            Returns self.
-        """
-
-        # Fit
-        if not check_params(Xs=Xs, Xt=Xt):
-            return
-        # pairwise distance
-        self.cost_ = pairwiseEuclideanGPU(Xs, Xt, returnAsGPU=True,
-                                          squared=True)
-        self.cost_ = cost_normalization(self.cost_, self.norm)
-
-        # distribution estimation
-        self.mu_s = self.distribution_estimation(Xs)
-        self.mu_t = self.distribution_estimation(Xt)
-
-        # store arrays of samples
-        self.xs_ = Xs
-        self.xt_ = Xt
-
-        # coupling estimation
-        returned_ = sinkhorn_knopp(
-            a=self.mu_s, b=self.mu_t, M_GPU=self.cost_, reg=self.reg_e,
-            numItermax=self.max_iter, stopThr=self.tol,
-            verbose=self.verbose, log=self.log)
-
-        # deal with the value of log
-        if self.log:
-            self.coupling_, self.log_ = returned_
-        else:
-            self.coupling_ = returned_
-            self.log_ = dict()
-
-        return self
-
-
-class SinkhornLpl1Transport(BaseTransport):
-    """Domain Adapatation OT method based on sinkhorn algorithm +
-    LpL1 class regularization.
-
-    Parameters
-    ----------
-    reg_e : float, optional (default=1)
-        Entropic regularization parameter
-    reg_cl : float, optional (default=0.1)
-        Class regularization parameter
-    mapping : string, optional (default="barycentric")
-        The kind of mapping to apply to transport samples from a domain into
-        another one.
-        if "barycentric" only the samples used to estimate the coupling can
-        be transported from a domain to another one.
-    metric : string, optional (default="sqeuclidean")
-        The ground metric for the Wasserstein problem
-    norm : string, optional (default=None)
-        If given, normalize the ground metric to avoid numerical errors that
-        can occur with large metric values.
-    distribution : string, optional (default="uniform")
-        The kind of distribution estimation to employ
-    max_iter : int, float, optional (default=10)
-        The minimum number of iteration before stopping the optimization
-        algorithm if no it has not converged
-    max_inner_iter : int, float, optional (default=200)
-        The number of iteration in the inner loop
-    verbose : int, optional (default=0)
-        Controls the verbosity of the optimization algorithm
-    limit_max: float, optional (defaul=np.infty)
-        Controls the semi supervised mode. Transport between labeled source
-        and target samples of different classes will exhibit an infinite cost
-
-    Attributes
-    ----------
-    coupling_ : array-like, shape (n_source_samples, n_target_samples)
-        The optimal coupling
-
-    References
-    ----------
-
-    .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
-       "Optimal Transport for Domain Adaptation," in IEEE
-       Transactions on Pattern Analysis and Machine Intelligence ,
-       vol.PP, no.99, pp.1-1
-    .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
-       Generalized conditional gradient: analysis of convergence
-       and applications. arXiv preprint arXiv:1510.06567.
-
-    """
-
-    def __init__(self, reg_e=1., reg_cl=0.1,
-                 max_iter=10, max_inner_iter=200,
-                 tol=10e-9, verbose=False,
-                 metric="sqeuclidean", norm=None,
-                 distribution_estimation=distribution_estimation_uniform,
-                 out_of_sample_map='ferradans', limit_max=np.infty):
-
-        self.reg_e = reg_e
-        self.reg_cl = reg_cl
-        self.max_iter = max_iter
-        self.max_inner_iter = max_inner_iter
-        self.tol = tol
-        self.verbose = verbose
-        self.metric = metric
-        self.norm = norm
-        self.distribution_estimation = distribution_estimation
-        self.out_of_sample_map = out_of_sample_map
-        self.limit_max = limit_max
-
-    def fit(self, Xs, ys=None, Xt=None, yt=None):
-        """Build a coupling matrix from source and target sets of samples
-        (Xs, ys) and (Xt, yt)
-
-        Parameters
-        ----------
-        Xs : array-like, shape (n_source_samples, n_features)
-            The training input samples.
-        ys : array-like, shape (n_source_samples,)
-            The class labels
-        Xt : array-like, shape (n_target_samples, n_features)
-            The training input samples.
-        yt : array-like, shape (n_target_samples,)
-            The class labels. If some target samples are unlabeled, fill the
-            yt's elements with -1.
-
-            Warning: Note that, due to this convention -1 cannot be used as a
-            class label
-
-        Returns
-        -------
-        self : object
-            Returns self.
-        """
-
-        # check the necessary inputs parameters are here
-        if not check_params(Xs=Xs, Xt=Xt, ys=ys):
-            return self
-
-        # pairwise distance
-        self.cost_ = pairwiseEuclideanGPU(Xs, Xt, returnAsGPU=True,
-                                          squared=True)
-        self.cost_ = cost_normalization(self.cost_, self.norm)
-
-        # distribution estimation
-        self.mu_s = self.distribution_estimation(Xs)
-        self.mu_t = self.distribution_estimation(Xt)
-
-        # store arrays of samples
-        self.xs_ = Xs
-        self.xt_ = Xt
-
-        self.coupling_ = sinkhorn_lpl1_mm(
-            a=self.mu_s, labels_a=ys, b=self.mu_t, M_GPU=self.cost_,
-            reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
-            numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
-            verbose=self.verbose)
-
-        return self
diff --git a/ot/utils.py b/ot/utils.py
index 5c73bc7ee..6041adde8 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -83,8 +83,14 @@ class gpu_fun(object):
     ... def some_function(): pass
     Parameters
     ----------
-    extra : string
-          to be added to the deprecation messages
+    in_arrays : array of indexes of the parameters of some_function that must
+              be uploaded to the GPU
+    out_arrays: same but for the return and to download from gpu
+
+    Be carefull, the in_arrays indexes are from the parameters of the function
+    given in the *args. The parameters in **kwargs are not considered. So when
+    you call your function, the parameters given by name=value are not
+    considered in in_arrays and are thus not uploaded to the GPU.
     """
 
     # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary,
@@ -106,28 +112,16 @@ def __call__(self, obj):
             return self._decorate_fun(obj)
 
     def _decorate_class(self, cls):
-        # TODO: we should probably reset __new__ for full generality
-        # init = cls.__init__
-        # def wrapped(*args, **kwargs):
-        #     warnings.warn(msg, category=DeprecationWarning)
-        #     return init(*args, **kwargs)
-        # cls.__init__ = wrapped
-        # wrapped.__name__ = '__init__'
-        # wrapped.__doc__ = self._update_doc(init.__doc__)
-        # wrapped.deprecated_original = init
-
         return cls
 
     def _decorate_fun(self, fun):
         """Decorate function fun"""
 
         def wrapped(*args, **kwargs):
-
             if cp and 'gpu' in kwargs and kwargs['gpu']:
-                # convert args to gpu twhose index are in in_array
+                # convert args to gpu whose index are in in_arrays
                 args = [to_gpu(arg) if i in self.in_arrays else arg
                         for i, arg in enumerate(args)]
-
             # clean kwargs
             if 'gpu' in kwargs:
                 kwargs.pop('gpu')

From 39275aa74a9d997202dc60c7ec1b682c49568058 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Sun, 3 Dec 2017 14:29:41 +0100
Subject: [PATCH 17/42] fix some warnings

---
 ot/da.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/ot/da.py b/ot/da.py
index 9eb3f282e..1905aa07f 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -14,7 +14,7 @@
 from .bregman import sinkhorn, sinkhorn_knopp
 from .lp import emd
 from .utils import unif, dist, pairwiseEuclidean, kernel, cost_normalization
-from .utils import check_params, deprecated, BaseEstimator, to_gpu, gpu_fun
+from .utils import check_params, deprecated, BaseEstimator, gpu_fun
 from .utils import get_array_module
 from .optim import cg
 from .optim import gcg
@@ -339,17 +339,17 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
     if bias:
         xs1 = np.hstack((xs, np.ones((ns, 1))))
         xstxs = xs1.T.dot(xs1)
-        I = np.eye(d + 1)
-        I[-1] = 0
-        I0 = I[:, :-1]
+        Identity = np.eye(d + 1)
+        Identity[-1] = 0
+        I0 = Identity[:, :-1]
 
         def sel(x):
             return x[:-1, :]
     else:
         xs1 = xs
         xstxs = xs1.T.dot(xs1)
-        I = np.eye(d)
-        I0 = I
+        Identity = np.eye(d)
+        I0 = Identity
 
         def sel(x):
             return x
@@ -529,8 +529,8 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
     K = kernel(xs, xs, method=kerneltype, sigma=sigma)
     if bias:
         K1 = np.hstack((K, np.ones((ns, 1))))
-        I = np.eye(ns + 1)
-        I[-1] = 0
+        Identity = np.eye(ns + 1)
+        Identity[-1] = 0
         Kp = np.eye(ns + 1)
         Kp[:ns, :ns] = K
 
@@ -544,14 +544,14 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
 
     else:
         K1 = K
-        I = np.eye(ns)
+        Identity = np.eye(ns)
 
         # ls regul
         # K0 = K1.T.dot(K1)+eta*I
         # Kreg=I
 
         # proper kernel ridge
-        K0 = K + eta * I
+        K0 = K + eta * Identity
         Kreg = K
 
     if log:

From 7ad1571d6e220a9a797fa8b2d07f76830774da88 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Sun, 3 Dec 2017 14:50:28 +0100
Subject: [PATCH 18/42] fix mistake

---
 ot/da.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/ot/da.py b/ot/da.py
index 1905aa07f..16d0f55b9 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -120,7 +120,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
     # For each class, store the indexes of the class
     for c in classes:
         idxc, = np.where(labels_a == c)
-        indices_labels.append(np.asarray(idxc.reshape(1, -1)))
+        indices_labels.append(idxc)
 
     W = np.zeros(M.shape)
 
@@ -365,12 +365,14 @@ def sel(x):
 
     def loss(L, G):
         """Compute full loss"""
-        return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.sum(sel(L - I0)**2)
+        return (np.sum((xs1.dot(L) - ns * G.dot(xt))**2) +
+                mu * np.sum(G * M) + eta * np.sum(sel(L - I0)**2))
 
     def solve_L(G):
         """ solve L problem with fixed G (least square)"""
         xst = ns * G.dot(xt)
-        return np.linalg.solve(xstxs + eta * I, xs1.T.dot(xst) + eta * I0)
+        return np.linalg.solve(xstxs + eta * Identity,
+                               xs1.T.dot(xst) + eta * I0)
 
     def solve_G(L, G0):
         """Update G with CG algorithm"""
@@ -565,7 +567,8 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
 
     def loss(L, G):
         """Compute full loss"""
-        return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L))
+        return (np.sum((K1.dot(L) - ns * G.dot(xt))**2) +
+                mu * np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L)))
 
     def solve_L_nobias(G):
         """ solve L problem with fixed G (least square)"""

From 0b40b5f122f9d425b7c7fcbf02bd5468611759a0 Mon Sep 17 00:00:00 2001
From: aje <leo_g_autheron@hotmail.fr>
Date: Sun, 3 Dec 2017 15:02:25 +0100
Subject: [PATCH 19/42] Update gpu test file

---
 test/test_gpu.py | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/test/test_gpu.py b/test/test_gpu.py
index e06f28482..27ac41c06 100644
--- a/test/test_gpu.py
+++ b/test/test_gpu.py
@@ -1,4 +1,4 @@
-"""Tests for module gpu for gpu acceleration """
+"""Tests for gpu acceleration """
 
 # Author: Remi Flamary <remi.flamary@unice.fr>
 #
@@ -11,7 +11,7 @@
 import ot
 
 try:  # test if cupy is installed
-    import ot.gpu
+    import cupy as cp
     nogpu = False
 except ImportError:
     nogpu = True
@@ -22,29 +22,23 @@ def test_gpu_sinkhorn():
 
     rng = np.random.RandomState(0)
 
-    def describe_res(r):
-        print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format(
-            np.min(r), np.max(r), np.mean(r), np.std(r)))
-
     for n_samples in [50, 100, 500, 1000]:
         print(n_samples)
         a = rng.rand(n_samples // 4, 100)
         b = rng.rand(n_samples, 100)
         time1 = time.time()
-        transport = ot.da.SinkhornTransport()
+        transport = ot.da.SinkhornTransport(gpu=False)
         transport.fit(Xs=a, Xt=b)
         G1 = transport.coupling_
         time2 = time.time()
-        transport = ot.gpu.da.SinkhornTransport()
+        transport = ot.da.SinkhornTransport(gpu=True)
         transport.fit(Xs=a, Xt=b)
         G2 = transport.coupling_
         time3 = time.time()
         print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1))
-        describe_res(G1)
         print("   GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2))
-        describe_res(G2)
 
-        np.testing.assert_allclose(G1, G2, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(G1, cp.asnumpy(G2), rtol=1e-5, atol=1e-5)
 
 
 @pytest.mark.skipif(nogpu, reason="No GPU available")
@@ -52,29 +46,23 @@ def test_gpu_sinkhorn_lpl1():
 
     rng = np.random.RandomState(0)
 
-    def describe_res(r):
-        print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
-              .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
-
     for n_samples in [50, 100, 500]:
         print(n_samples)
         a = rng.rand(n_samples // 4, 100)
         labels_a = np.random.randint(10, size=(n_samples // 4))
         b = rng.rand(n_samples, 100)
         time1 = time.time()
-        transport = ot.da.SinkhornLpl1Transport()
+        transport = ot.da.SinkhornLpl1Transport(gpu=False)
         transport.fit(Xs=a, ys=labels_a, Xt=b)
         G1 = transport.coupling_
         time2 = time.time()
-        transport = ot.gpu.da.SinkhornLpl1Transport()
+        transport = ot.da.SinkhornLpl1Transport(gpu=True)
         transport.fit(Xs=a, ys=labels_a, Xt=b)
         G2 = transport.coupling_
         time3 = time.time()
         print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(
             time2 - time1))
-        describe_res(G1)
         print("   GPU sinkhorn lpl1, time: {:6.2f} sec ".format(
             time3 - time2))
-        describe_res(G2)
 
-        np.testing.assert_allclose(G1, G2, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(G1, cp.asnumpy(G2), rtol=1e-5, atol=1e-5)

From cb739f625921e7fc19113d6d758e27ac69eac24b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Tue, 20 Mar 2018 15:05:57 +0100
Subject: [PATCH 20/42] add linear mapping function

---
 examples/plot_otda_linear_mapping.py | 54 ++++++++++++++++++++++++++++
 ot/da.py                             | 36 +++++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 examples/plot_otda_linear_mapping.py

diff --git a/examples/plot_otda_linear_mapping.py b/examples/plot_otda_linear_mapping.py
new file mode 100644
index 000000000..eff264830
--- /dev/null
+++ b/examples/plot_otda_linear_mapping.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Mar 20 14:31:15 2018
+
+@author: rflamary
+"""
+
+import numpy as np
+import pylab as pl
+import ot
+
+
+
+#%%
+
+
+n=1000
+d=2
+sigma=.1
+
+angles=np.random.rand(n,1)*2*np.pi
+xs=np.concatenate((np.sin(angles),np.cos(angles)),axis=1)+sigma*np.random.randn(n,2)
+
+xs[:n//2,1]+=2
+
+anglet=np.random.rand(n,1)*2*np.pi
+xt=np.concatenate((np.sin(anglet),np.cos(anglet)),axis=1)+sigma*np.random.randn(n,2)
+xt[:n//2,1]+=2
+
+
+A=np.array([[1.5,.7],[.7,1.5]])
+b=np.array([[4,2]])
+xt=xt.dot(A)+b
+
+#%%
+
+pl.figure(1,(5,5))
+pl.plot(xs[:,0],xs[:,1],'+')
+pl.plot(xt[:,0],xt[:,1],'o')
+
+#%%
+
+Ae,be=ot.da.OT_mapping_linear(xs,xt)
+
+xst=xs.dot(Ae)+be
+
+##%%
+
+pl.figure(1,(5,5))
+pl.clf()
+pl.plot(xs[:,0],xs[:,1],'+')
+pl.plot(xt[:,0],xt[:,1],'o')
+pl.plot(xst[:,0],xst[:,1],'+')
\ No newline at end of file
diff --git a/ot/da.py b/ot/da.py
index c68865418..63bee5a4b 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -10,6 +10,7 @@
 # License: MIT License
 
 import numpy as np
+import scipy.linalg as linalg
 
 from .bregman import sinkhorn
 from .lp import emd
@@ -633,6 +634,41 @@ def df(G):
         return G, L
 
 
+def OT_mapping_linear(xs, xt, reg=1e-6,ws=None,wt=None,log=False):
+    """ return OT linear operator between samples"""
+
+    d=xs.shape[1]    
+    
+    mxs=xs.mean(0,keepdims=True)
+    mxt=xt.mean(0,keepdims=True)
+
+    
+    if ws is None:
+        ws=np.ones((xs.shape[0],1))/xs.shape[0]
+    
+    if wt is None:
+        wt=np.ones((xt.shape[0],1))/xt.shape[0]    
+
+    Cs=(xs*ws).T.dot(xs)/ws.sum()+reg*np.eye(d)
+    Ct=(xt*wt).T.dot(xt)/wt.sum()+reg*np.eye(d)
+    
+    
+    Cs12=linalg.sqrtm(Cs)
+    Cs_12=linalg.inv(Cs12)
+        
+    M0=linalg.sqrtm(Cs12.dot(Ct.dot(Cs12)))
+        
+    A=Cs_12.dot(M0.dot(Cs_12)).T   
+    
+    b=mxt-mxs.dot(A)
+    
+    if log:
+        pass
+    else:
+        return A,b
+
+
+
 @deprecated("The class OTDA is deprecated in 0.3.1 and will be "
             "removed in 0.5"
             "\n\tfor standard transport use class EMDTransport instead.")

From 8fc9fce6c920c646ea7324ac0af54ad53e9aa1bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Tue, 20 Mar 2018 16:21:47 +0100
Subject: [PATCH 21/42] add class LinearTransport

---
 examples/plot_otda_linear_mapping.py |  35 +++-
 ot/da.py                             | 239 ++++++++++++++++++++++++++-
 2 files changed, 265 insertions(+), 9 deletions(-)

diff --git a/examples/plot_otda_linear_mapping.py b/examples/plot_otda_linear_mapping.py
index eff264830..44aa9c5ef 100644
--- a/examples/plot_otda_linear_mapping.py
+++ b/examples/plot_otda_linear_mapping.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pylab as pl
 import ot
-
+import scipy.linalg as linalg
 
 
 #%%
@@ -19,11 +19,13 @@
 d=2
 sigma=.1
 
+# source samples
 angles=np.random.rand(n,1)*2*np.pi
 xs=np.concatenate((np.sin(angles),np.cos(angles)),axis=1)+sigma*np.random.randn(n,2)
-
 xs[:n//2,1]+=2
 
+
+# target samples
 anglet=np.random.rand(n,1)*2*np.pi
 xt=np.concatenate((np.sin(anglet),np.cos(anglet)),axis=1)+sigma*np.random.randn(n,2)
 xt[:n//2,1]+=2
@@ -43,7 +45,33 @@
 
 Ae,be=ot.da.OT_mapping_linear(xs,xt)
 
+Ae1=linalg.inv(Ae)
+be1=-be.dot(Ae1)
+
 xst=xs.dot(Ae)+be
+xts=xt.dot(Ae1)+be1
+
+##%%
+
+pl.figure(1,(5,5))
+pl.clf()
+pl.plot(xs[:,0],xs[:,1],'+')
+pl.plot(xt[:,0],xt[:,1],'o')
+pl.plot(xst[:,0],xst[:,1],'+')
+pl.plot(xts[:,0],xts[:,1],'o')
+
+pl.show()
+
+
+#%% Example class with  on images
+
+mapping=ot.da.LinearTransport()
+
+mapping.fit(Xs=xs,Xt=xt)
+
+
+xst=mapping.transform(Xs=xs)
+xts=mapping.inverse_transform(Xt=xt)
 
 ##%%
 
@@ -51,4 +79,5 @@
 pl.clf()
 pl.plot(xs[:,0],xs[:,1],'+')
 pl.plot(xt[:,0],xt[:,1],'o')
-pl.plot(xst[:,0],xst[:,1],'+')
\ No newline at end of file
+pl.plot(xst[:,0],xst[:,1],'+')
+pl.plot(xts[:,0],xts[:,1],'o')
diff --git a/ot/da.py b/ot/da.py
index 63bee5a4b..ab5f86006 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -634,13 +634,76 @@ def df(G):
         return G, L
 
 
-def OT_mapping_linear(xs, xt, reg=1e-6,ws=None,wt=None,log=False):
-    """ return OT linear operator between samples"""
+def OT_mapping_linear(xs, xt, reg=1e-6,ws=None,wt=None,bias=True,log=False):
+    """ return OT linear operator between samples
+
+    The function estimate the optimal linear operator that align the two
+    empirical distributions. This is equivalent to estimating the closed 
+    form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)` 
+    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14].
+    
+    The linear operator from source to target :math:`M`
+
+    .. math::
+        M(x)=Ax+b
+        
+    where :
+    
+    .. math::
+        A=\Sigma_s^{-1/2}(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2})^{1/2}
+        \Sigma_s^{-1/2}
+    .. math::  
+        b=\mu_t-A\mu_s
+
+    Parameters
+    ----------
+    xs : np.ndarray (ns,d)
+        samples in the source domain
+    xt : np.ndarray (nt,d)
+        samples in the target domain
+    reg : float,optional
+        regularization added to the daigonals of convariances (>0)
+    ws : np.ndarray (ns,1), optional
+        weights for the source samples
+    wt : np.ndarray (ns,1), optional
+        weights for the target samples  
+    bias: boolean, optional
+        estimate bias b else b=0 (default:True)
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    A : (d x d) ndarray
+        Linear operator
+    b : (1 x d) ndarray
+        bias
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [14] Knott, M. and Smith, C. S. "On the optimal mapping of 
+        distributions", Journal of Optimization Theory and Applications
+        Vol 43, 1984
+
+
+    """
 
     d=xs.shape[1]    
     
-    mxs=xs.mean(0,keepdims=True)
-    mxt=xt.mean(0,keepdims=True)
+    if bias:
+        mxs=xs.mean(0,keepdims=True)
+        mxt=xt.mean(0,keepdims=True)
+        
+        xs=xs-mxs
+        xt=xt-mxt
+    else:
+        mxs=np.zeros((1,d))
+        mxt=np.zeros((1,d))
 
     
     if ws is None:
@@ -658,12 +721,17 @@ def OT_mapping_linear(xs, xt, reg=1e-6,ws=None,wt=None,log=False):
         
     M0=linalg.sqrtm(Cs12.dot(Ct.dot(Cs12)))
         
-    A=Cs_12.dot(M0.dot(Cs_12)).T   
+    A=Cs_12.dot(M0.dot(Cs_12))
     
     b=mxt-mxs.dot(A)
     
     if log:
-        pass
+        log={}
+        log['Cs']=Cs
+        log['Ct']=Ct
+        log['Cs12']=Cs12
+        log['Cs_12']=Cs_12
+        return A,b,log
     else:
         return A,b
 
@@ -1216,6 +1284,165 @@ class label
             return transp_Xt
 
 
+class LinearTransport(BaseTransport):
+    """ OT linear operator between empirical distributions
+
+    The function estimate the optimal linear operator that align the two
+    empirical distributions. This is equivalent to estimating the closed 
+    form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)` 
+    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14].
+    
+    The linear operator from source to target :math:`M`
+
+    .. math::
+        M(x)=Ax+b
+        
+    where :
+    
+    .. math::
+        A=\Sigma_s^{-1/2}(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2})^{1/2}
+        \Sigma_s^{-1/2}
+    .. math::  
+        b=\mu_t-A\mu_s
+
+    Parameters
+    ----------
+    reg : float,optional
+        regularization added to the daigonals of convariances (>0) 
+    bias: boolean, optional
+        estimate bias b else b=0 (default:True)
+    log : bool, optional
+        record log if True
+        
+        
+    """
+    
+    def __init__(self, reg=1e-8,bias=True,log=False,
+                 distribution_estimation=distribution_estimation_uniform):
+        
+        self.bias=bias
+        self.log=log
+        self.reg=reg
+        self.distribution_estimation=distribution_estimation
+
+    def fit(self, Xs=None, ys=None, Xt=None, yt=None):
+        """Build a coupling matrix from source and target sets of samples
+        (Xs, ys) and (Xt, yt)
+
+        Parameters
+        ----------
+        Xs : array-like, shape (n_source_samples, n_features)
+            The training input samples.
+        ys : array-like, shape (n_source_samples,)
+            The class labels
+        Xt : array-like, shape (n_target_samples, n_features)
+            The training input samples.
+        yt : array-like, shape (n_target_samples,)
+            The class labels. If some target samples are unlabeled, fill the
+            yt's elements with -1.
+
+            Warning: Note that, due to this convention -1 cannot be used as a
+            class label
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+        
+        self.mu_s = self.distribution_estimation(Xs)
+        self.mu_t = self.distribution_estimation(Xt)
+
+        
+
+        # coupling estimation
+        returned_ = OT_mapping_linear(Xs,Xt,reg=self.reg,
+                                      ws=self.mu_s.reshape((-1,1)),
+                                      wt=self.mu_t.reshape((-1,1)),
+                                      bias=self.bias,log=self.log)
+
+        # deal with the value of log
+        if self.log:
+            self.A_, self.B_, self.log_ = returned_
+        else:
+            self.A_, self.B_, = returned_
+            self.log_ = dict()
+        
+        # re compute inverse mapping
+        self.A1_=linalg.inv(self.A_)
+        self.B1_=-self.B_.dot(self.A1_)
+
+        return self
+
+    def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128):
+        """Transports source samples Xs onto target ones Xt
+
+        Parameters
+        ----------
+        Xs : array-like, shape (n_source_samples, n_features)
+            The training input samples.
+        ys : array-like, shape (n_source_samples,)
+            The class labels
+        Xt : array-like, shape (n_target_samples, n_features)
+            The training input samples.
+        yt : array-like, shape (n_target_samples,)
+            The class labels. If some target samples are unlabeled, fill the
+            yt's elements with -1.
+
+            Warning: Note that, due to this convention -1 cannot be used as a
+            class label
+        batch_size : int, optional (default=128)
+            The batch size for out of sample inverse transform
+
+        Returns
+        -------
+        transp_Xs : array-like, shape (n_source_samples, n_features)
+            The transport source samples.
+        """
+
+        # check the necessary inputs parameters are here
+        if check_params(Xs=Xs):
+
+            transp_Xs= Xs.dot(self.A_)+self.B_
+
+            return transp_Xs
+
+    def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None,
+                          batch_size=128):
+        """Transports target samples Xt onto target samples Xs
+
+        Parameters
+        ----------
+        Xs : array-like, shape (n_source_samples, n_features)
+            The training input samples.
+        ys : array-like, shape (n_source_samples,)
+            The class labels
+        Xt : array-like, shape (n_target_samples, n_features)
+            The training input samples.
+        yt : array-like, shape (n_target_samples,)
+            The class labels. If some target samples are unlabeled, fill the
+            yt's elements with -1.
+
+            Warning: Note that, due to this convention -1 cannot be used as a
+            class label
+        batch_size : int, optional (default=128)
+            The batch size for out of sample inverse transform
+
+        Returns
+        -------
+        transp_Xt : array-like, shape (n_source_samples, n_features)
+            The transported target samples.
+        """
+
+        # check the necessary inputs parameters are here
+        if check_params(Xt=Xt):
+
+            transp_Xt= Xt.dot(self.A1_)+self.B1_
+
+            return transp_Xt
+
+
+
 class SinkhornTransport(BaseTransport):
 
     """Domain Adapatation OT method based on Sinkhorn Algorithm

From c1046238d826fe9cf1294f8ea60b8d44743fac78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Tue, 20 Mar 2018 16:27:49 +0100
Subject: [PATCH 22/42] passing tests

---
 examples/plot_otda_linear_mapping.py |  74 +++++++-------
 ot/da.py                             | 147 +++++++++++++--------------
 2 files changed, 110 insertions(+), 111 deletions(-)

diff --git a/examples/plot_otda_linear_mapping.py b/examples/plot_otda_linear_mapping.py
index 44aa9c5ef..143f129e6 100644
--- a/examples/plot_otda_linear_mapping.py
+++ b/examples/plot_otda_linear_mapping.py
@@ -15,69 +15,71 @@
 #%%
 
 
-n=1000
-d=2
-sigma=.1
+n = 1000
+d = 2
+sigma = .1
 
 # source samples
-angles=np.random.rand(n,1)*2*np.pi
-xs=np.concatenate((np.sin(angles),np.cos(angles)),axis=1)+sigma*np.random.randn(n,2)
-xs[:n//2,1]+=2
+angles = np.random.rand(n, 1) * 2 * np.pi
+xs = np.concatenate((np.sin(angles), np.cos(angles)),
+                    axis=1) + sigma * np.random.randn(n, 2)
+xs[:n // 2, 1] += 2
 
 
 # target samples
-anglet=np.random.rand(n,1)*2*np.pi
-xt=np.concatenate((np.sin(anglet),np.cos(anglet)),axis=1)+sigma*np.random.randn(n,2)
-xt[:n//2,1]+=2
+anglet = np.random.rand(n, 1) * 2 * np.pi
+xt = np.concatenate((np.sin(anglet), np.cos(anglet)),
+                    axis=1) + sigma * np.random.randn(n, 2)
+xt[:n // 2, 1] += 2
 
 
-A=np.array([[1.5,.7],[.7,1.5]])
-b=np.array([[4,2]])
-xt=xt.dot(A)+b
+A = np.array([[1.5, .7], [.7, 1.5]])
+b = np.array([[4, 2]])
+xt = xt.dot(A) + b
 
 #%%
 
-pl.figure(1,(5,5))
-pl.plot(xs[:,0],xs[:,1],'+')
-pl.plot(xt[:,0],xt[:,1],'o')
+pl.figure(1, (5, 5))
+pl.plot(xs[:, 0], xs[:, 1], '+')
+pl.plot(xt[:, 0], xt[:, 1], 'o')
 
 #%%
 
-Ae,be=ot.da.OT_mapping_linear(xs,xt)
+Ae, be = ot.da.OT_mapping_linear(xs, xt)
 
-Ae1=linalg.inv(Ae)
-be1=-be.dot(Ae1)
+Ae1 = linalg.inv(Ae)
+be1 = -be.dot(Ae1)
 
-xst=xs.dot(Ae)+be
-xts=xt.dot(Ae1)+be1
+xst = xs.dot(Ae) + be
+xts = xt.dot(Ae1) + be1
 
-##%%
+# %%
 
-pl.figure(1,(5,5))
+pl.figure(1, (5, 5))
 pl.clf()
-pl.plot(xs[:,0],xs[:,1],'+')
-pl.plot(xt[:,0],xt[:,1],'o')
-pl.plot(xst[:,0],xst[:,1],'+')
-pl.plot(xts[:,0],xts[:,1],'o')
+pl.plot(xs[:, 0], xs[:, 1], '+')
+pl.plot(xt[:, 0], xt[:, 1], 'o')
+pl.plot(xst[:, 0], xst[:, 1], '+')
+pl.plot(xts[:, 0], xts[:, 1], 'o')
 
 pl.show()
 
 
 #%% Example class with  on images
 
-mapping=ot.da.LinearTransport()
+mapping = ot.da.LinearTransport()
 
-mapping.fit(Xs=xs,Xt=xt)
+mapping.fit(Xs=xs, Xt=xt)
 
 
-xst=mapping.transform(Xs=xs)
-xts=mapping.inverse_transform(Xt=xt)
+xst = mapping.transform(Xs=xs)
+xts = mapping.inverse_transform(Xt=xt)
 
-##%%
+# %%
 
-pl.figure(1,(5,5))
+pl.figure(1, (5, 5))
 pl.clf()
-pl.plot(xs[:,0],xs[:,1],'+')
-pl.plot(xt[:,0],xt[:,1],'o')
-pl.plot(xst[:,0],xst[:,1],'+')
-pl.plot(xts[:,0],xts[:,1],'o')
+pl.plot(xs[:, 0], xs[:, 1], '+')
+pl.plot(xt[:, 0], xt[:, 1], 'o')
+pl.plot(xst[:, 0], xst[:, 1], '+')
+pl.plot(xts[:, 0], xts[:, 1], 'o')
diff --git a/ot/da.py b/ot/da.py
index ab5f86006..f789396a6 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -357,7 +357,8 @@ def sel(x):
 
     def loss(L, G):
         """Compute full loss"""
-        return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.sum(sel(L - I0)**2)
+        return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * \
+            np.sum(G * M) + eta * np.sum(sel(L - I0)**2)
 
     def solve_L(G):
         """ solve L problem with fixed G (least square)"""
@@ -557,7 +558,8 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
 
     def loss(L, G):
         """Compute full loss"""
-        return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L))
+        return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * \
+            np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L))
 
     def solve_L_nobias(G):
         """ solve L problem with fixed G (least square)"""
@@ -634,25 +636,26 @@ def df(G):
         return G, L
 
 
-def OT_mapping_linear(xs, xt, reg=1e-6,ws=None,wt=None,bias=True,log=False):
+def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
+                      wt=None, bias=True, log=False):
     """ return OT linear operator between samples
 
     The function estimate the optimal linear operator that align the two
-    empirical distributions. This is equivalent to estimating the closed 
-    form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)` 
+    empirical distributions. This is equivalent to estimating the closed
+    form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)`
     and :math:`N(\mu_t,\Sigma_t)` as proposed in [14].
-    
+
     The linear operator from source to target :math:`M`
 
     .. math::
         M(x)=Ax+b
-        
+
     where :
-    
+
     .. math::
         A=\Sigma_s^{-1/2}(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2})^{1/2}
         \Sigma_s^{-1/2}
-    .. math::  
+    .. math::
         b=\mu_t-A\mu_s
 
     Parameters
@@ -666,7 +669,7 @@ def OT_mapping_linear(xs, xt, reg=1e-6,ws=None,wt=None,bias=True,log=False):
     ws : np.ndarray (ns,1), optional
         weights for the source samples
     wt : np.ndarray (ns,1), optional
-        weights for the target samples  
+        weights for the target samples
     bias: boolean, optional
         estimate bias b else b=0 (default:True)
     log : bool, optional
@@ -686,55 +689,52 @@ def OT_mapping_linear(xs, xt, reg=1e-6,ws=None,wt=None,bias=True,log=False):
     References
     ----------
 
-    .. [14] Knott, M. and Smith, C. S. "On the optimal mapping of 
+    .. [14] Knott, M. and Smith, C. S. "On the optimal mapping of
         distributions", Journal of Optimization Theory and Applications
         Vol 43, 1984
 
 
     """
 
-    d=xs.shape[1]    
-    
+    d = xs.shape[1]
+
     if bias:
-        mxs=xs.mean(0,keepdims=True)
-        mxt=xt.mean(0,keepdims=True)
-        
-        xs=xs-mxs
-        xt=xt-mxt
+        mxs = xs.mean(0, keepdims=True)
+        mxt = xt.mean(0, keepdims=True)
+
+        xs = xs - mxs
+        xt = xt - mxt
     else:
-        mxs=np.zeros((1,d))
-        mxt=np.zeros((1,d))
+        mxs = np.zeros((1, d))
+        mxt = np.zeros((1, d))
 
-    
     if ws is None:
-        ws=np.ones((xs.shape[0],1))/xs.shape[0]
-    
+        ws = np.ones((xs.shape[0], 1)) / xs.shape[0]
+
     if wt is None:
-        wt=np.ones((xt.shape[0],1))/xt.shape[0]    
-
-    Cs=(xs*ws).T.dot(xs)/ws.sum()+reg*np.eye(d)
-    Ct=(xt*wt).T.dot(xt)/wt.sum()+reg*np.eye(d)
-    
-    
-    Cs12=linalg.sqrtm(Cs)
-    Cs_12=linalg.inv(Cs12)
-        
-    M0=linalg.sqrtm(Cs12.dot(Ct.dot(Cs12)))
-        
-    A=Cs_12.dot(M0.dot(Cs_12))
-    
-    b=mxt-mxs.dot(A)
-    
+        wt = np.ones((xt.shape[0], 1)) / xt.shape[0]
+
+    Cs = (xs * ws).T.dot(xs) / ws.sum() + reg * np.eye(d)
+    Ct = (xt * wt).T.dot(xt) / wt.sum() + reg * np.eye(d)
+
+    Cs12 = linalg.sqrtm(Cs)
+    Cs_12 = linalg.inv(Cs12)
+
+    M0 = linalg.sqrtm(Cs12.dot(Ct.dot(Cs12)))
+
+    A = Cs_12.dot(M0.dot(Cs_12))
+
+    b = mxt - mxs.dot(A)
+
     if log:
-        log={}
-        log['Cs']=Cs
-        log['Ct']=Ct
-        log['Cs12']=Cs12
-        log['Cs_12']=Cs_12
-        return A,b,log
+        log = {}
+        log['Cs'] = Cs
+        log['Ct'] = Ct
+        log['Cs12'] = Cs12
+        log['Cs_12'] = Cs_12
+        return A, b, log
     else:
-        return A,b
-
+        return A, b
 
 
 @deprecated("The class OTDA is deprecated in 0.3.1 and will be "
@@ -1288,42 +1288,42 @@ class LinearTransport(BaseTransport):
     """ OT linear operator between empirical distributions
 
     The function estimate the optimal linear operator that align the two
-    empirical distributions. This is equivalent to estimating the closed 
-    form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)` 
+    empirical distributions. This is equivalent to estimating the closed
+    form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)`
     and :math:`N(\mu_t,\Sigma_t)` as proposed in [14].
-    
+
     The linear operator from source to target :math:`M`
 
     .. math::
         M(x)=Ax+b
-        
+
     where :
-    
+
     .. math::
         A=\Sigma_s^{-1/2}(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2})^{1/2}
         \Sigma_s^{-1/2}
-    .. math::  
+    .. math::
         b=\mu_t-A\mu_s
 
     Parameters
     ----------
     reg : float,optional
-        regularization added to the daigonals of convariances (>0) 
+        regularization added to the daigonals of convariances (>0)
     bias: boolean, optional
         estimate bias b else b=0 (default:True)
     log : bool, optional
         record log if True
-        
-        
+
+
     """
-    
-    def __init__(self, reg=1e-8,bias=True,log=False,
+
+    def __init__(self, reg=1e-8, bias=True, log=False,
                  distribution_estimation=distribution_estimation_uniform):
-        
-        self.bias=bias
-        self.log=log
-        self.reg=reg
-        self.distribution_estimation=distribution_estimation
+
+        self.bias = bias
+        self.log = log
+        self.reg = reg
+        self.distribution_estimation = distribution_estimation
 
     def fit(self, Xs=None, ys=None, Xt=None, yt=None):
         """Build a coupling matrix from source and target sets of samples
@@ -1349,17 +1349,15 @@ class label
         self : object
             Returns self.
         """
-        
+
         self.mu_s = self.distribution_estimation(Xs)
         self.mu_t = self.distribution_estimation(Xt)
 
-        
-
         # coupling estimation
-        returned_ = OT_mapping_linear(Xs,Xt,reg=self.reg,
-                                      ws=self.mu_s.reshape((-1,1)),
-                                      wt=self.mu_t.reshape((-1,1)),
-                                      bias=self.bias,log=self.log)
+        returned_ = OT_mapping_linear(Xs, Xt, reg=self.reg,
+                                      ws=self.mu_s.reshape((-1, 1)),
+                                      wt=self.mu_t.reshape((-1, 1)),
+                                      bias=self.bias, log=self.log)
 
         # deal with the value of log
         if self.log:
@@ -1367,10 +1365,10 @@ class label
         else:
             self.A_, self.B_, = returned_
             self.log_ = dict()
-        
+
         # re compute inverse mapping
-        self.A1_=linalg.inv(self.A_)
-        self.B1_=-self.B_.dot(self.A1_)
+        self.A1_ = linalg.inv(self.A_)
+        self.B1_ = -self.B_.dot(self.A1_)
 
         return self
 
@@ -1403,7 +1401,7 @@ class label
         # check the necessary inputs parameters are here
         if check_params(Xs=Xs):
 
-            transp_Xs= Xs.dot(self.A_)+self.B_
+            transp_Xs = Xs.dot(self.A_) + self.B_
 
             return transp_Xs
 
@@ -1437,12 +1435,11 @@ class label
         # check the necessary inputs parameters are here
         if check_params(Xt=Xt):
 
-            transp_Xt= Xt.dot(self.A1_)+self.B1_
+            transp_Xt = Xt.dot(self.A1_) + self.B1_
 
             return transp_Xt
 
 
-
 class SinkhornTransport(BaseTransport):
 
     """Domain Adapatation OT method based on Sinkhorn Algorithm

From 4fc9ccc7c6c96a43c48be54e89133c8f481d8bf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Tue, 20 Mar 2018 16:39:24 +0100
Subject: [PATCH 23/42] better example+test

---
 Makefile                             |  2 +-
 examples/plot_otda_linear_mapping.py | 98 +++++++++++++++++++++-------
 2 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile
index 3f19e8ac8..d334163bd 100644
--- a/Makefile
+++ b/Makefile
@@ -41,7 +41,7 @@ pep8 :
 	flake8 examples/ ot/ test/
 
 test : FORCE pep8
-	python -m py.test -v test/ --cov=ot --cov-report html:cov_html
+	python3 -m pytest -v test/ --cov=ot --cov-report html:cov_html
 	
 pytest : FORCE 
 	python -m py.test -v test/ --cov=ot
diff --git a/examples/plot_otda_linear_mapping.py b/examples/plot_otda_linear_mapping.py
index 143f129e6..163f8f197 100644
--- a/examples/plot_otda_linear_mapping.py
+++ b/examples/plot_otda_linear_mapping.py
@@ -9,11 +9,11 @@
 import numpy as np
 import pylab as pl
 import ot
-import scipy.linalg as linalg
-
-
-#%%
+from scipy import ndimage
 
+##############################################################################
+# Generate data
+# -------------
 
 n = 1000
 d = 2
@@ -37,49 +37,103 @@
 b = np.array([[4, 2]])
 xt = xt.dot(A) + b
 
-#%%
+##############################################################################
+# Plot data
+# ---------
 
 pl.figure(1, (5, 5))
 pl.plot(xs[:, 0], xs[:, 1], '+')
 pl.plot(xt[:, 0], xt[:, 1], 'o')
 
-#%%
 
-Ae, be = ot.da.OT_mapping_linear(xs, xt)
+##############################################################################
+# Estimate linear mapping and transport
+# -------------------------------------
 
-Ae1 = linalg.inv(Ae)
-be1 = -be.dot(Ae1)
+Ae, be = ot.da.OT_mapping_linear(xs, xt)
 
 xst = xs.dot(Ae) + be
-xts = xt.dot(Ae1) + be1
 
-# %%
+
+##############################################################################
+# Plot transported samples
+# ------------------------
 
 pl.figure(1, (5, 5))
 pl.clf()
 pl.plot(xs[:, 0], xs[:, 1], '+')
 pl.plot(xt[:, 0], xt[:, 1], 'o')
 pl.plot(xst[:, 0], xst[:, 1], '+')
-pl.plot(xts[:, 0], xts[:, 1], 'o')
 
 pl.show()
 
+##############################################################################
+# Mapping Class between images
+# ----------------------------
+
+
+def im2mat(I):
+    """Converts and image to matrix (one pixel per line)"""
+    return I.reshape((I.shape[0] * I.shape[1], I.shape[2]))
+
+
+def mat2im(X, shape):
+    """Converts back a matrix to an image"""
+    return X.reshape(shape)
+
+
+def minmax(I):
+    return np.clip(I, 0, 1)
+
+
+# Loading images
+I1 = ndimage.imread('../data/ocean_day.jpg').astype(np.float64) / 256
+I2 = ndimage.imread('../data/ocean_sunset.jpg').astype(np.float64) / 256
 
-#%% Example class with  on images
+
+X1 = im2mat(I1)
+X2 = im2mat(I2)
+
+##############################################################################
+# Estimate mapping and adapt
+# ----------------------------
 
 mapping = ot.da.LinearTransport()
 
-mapping.fit(Xs=xs, Xt=xt)
+mapping.fit(Xs=X1, Xt=X2)
 
 
-xst = mapping.transform(Xs=xs)
-xts = mapping.inverse_transform(Xt=xt)
+xst = mapping.transform(Xs=X1)
+xts = mapping.inverse_transform(Xt=X2)
+
+I1t = minmax(mat2im(xst, I1.shape))
+I2t = minmax(mat2im(xts, I2.shape))
 
 # %%
 
-pl.figure(1, (5, 5))
-pl.clf()
-pl.plot(xs[:, 0], xs[:, 1], '+')
-pl.plot(xt[:, 0], xt[:, 1], 'o')
-pl.plot(xst[:, 0], xst[:, 1], '+')
-pl.plot(xts[:, 0], xts[:, 1], 'o')
+
+##############################################################################
+# Plot transformed images
+# -----------------------
+
+pl.figure(2, figsize=(10, 7))
+
+pl.subplot(2, 2, 1)
+pl.imshow(I1)
+pl.axis('off')
+pl.title('Im. 1')
+
+pl.subplot(2, 2, 2)
+pl.imshow(I2)
+pl.axis('off')
+pl.title('Im. 2')
+
+pl.subplot(2, 2, 3)
+pl.imshow(I1t)
+pl.axis('off')
+pl.title('Mapping Im. 1')
+
+pl.subplot(2, 2, 4)
+pl.imshow(I2t)
+pl.axis('off')
+pl.title('Inverse mapping Im. 2')

From 88a81c37d2f8c5419f88ccac620186e583fab08f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Tue, 20 Mar 2018 16:49:31 +0100
Subject: [PATCH 24/42] makefile update

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index d334163bd..7e0c576c4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 
 
-PYTHON=python
+PYTHON=python3
 
 help :
 	@echo "The following make targets are available:"
@@ -41,7 +41,7 @@ pep8 :
 	flake8 examples/ ot/ test/
 
 test : FORCE pep8
-	python3 -m pytest -v test/ --cov=ot --cov-report html:cov_html
+	$(PYTHON) -m pytest -v test/ --cov=ot --cov-report html:cov_html
 	
 pytest : FORCE 
 	python -m py.test -v test/ --cov=ot

From 287c659ad35f5036ba2687caf73009ef455c7239 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Tue, 20 Mar 2018 16:57:35 +0100
Subject: [PATCH 25/42] update example

---
 examples/plot_otda_linear_mapping.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/plot_otda_linear_mapping.py b/examples/plot_otda_linear_mapping.py
index 163f8f197..165fe72d8 100644
--- a/examples/plot_otda_linear_mapping.py
+++ b/examples/plot_otda_linear_mapping.py
@@ -68,8 +68,8 @@
 pl.show()
 
 ##############################################################################
-# Mapping Class between images
-# ----------------------------
+# Load image data
+# ---------------
 
 
 def im2mat(I):

From 6fdf5de8fa27fa16d6b8910fe96eb67b7761aa0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 08:29:50 +0100
Subject: [PATCH 26/42] add linear mapping test + autopep8

---
 examples/plot_otda_linear_mapping.py |  5 ++---
 ot/bregman.py                        | 30 ++++++++++++++++++----------
 ot/lp/__init__.py                    |  3 ++-
 ot/optim.py                          |  9 ++++++---
 ot/utils.py                          |  2 +-
 test/test_da.py                      | 18 +++++++++++++++++
 6 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/examples/plot_otda_linear_mapping.py b/examples/plot_otda_linear_mapping.py
index 165fe72d8..7a3b76154 100644
--- a/examples/plot_otda_linear_mapping.py
+++ b/examples/plot_otda_linear_mapping.py
@@ -9,7 +9,6 @@
 import numpy as np
 import pylab as pl
 import ot
-from scipy import ndimage
 
 ##############################################################################
 # Generate data
@@ -87,8 +86,8 @@ def minmax(I):
 
 
 # Loading images
-I1 = ndimage.imread('../data/ocean_day.jpg').astype(np.float64) / 256
-I2 = ndimage.imread('../data/ocean_sunset.jpg').astype(np.float64) / 256
+I1 = pl.imread('../data/ocean_day.jpg').astype(np.float64) / 256
+I2 = pl.imread('../data/ocean_sunset.jpg').astype(np.float64) / 256
 
 
 X1 = im2mat(I1)
diff --git a/ot/bregman.py b/ot/bregman.py
index d63c51de1..07b8660e3 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -11,7 +11,8 @@
 import numpy as np
 
 
-def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
+             stopThr=1e-9, verbose=False, log=False, **kwargs):
     u"""
     Solve the entropic regularization optimal transport problem and return the OT matrix
 
@@ -120,7 +121,8 @@ def sink():
     return sink()
 
 
-def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
+              stopThr=1e-9, verbose=False, log=False, **kwargs):
     u"""
     Solve the entropic regularization optimal transport problem and return the loss
 
@@ -233,7 +235,8 @@ def sink():
     return sink()
 
 
-def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
+                   stopThr=1e-9, verbose=False, log=False, **kwargs):
     """
     Solve the entropic regularization optimal transport problem and return the OT matrix
 
@@ -403,7 +406,8 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, l
             return u.reshape((-1, 1)) * K * v.reshape((1, -1))
 
 
-def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=20, log=False, **kwargs):
+def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
+                        warmstart=None, verbose=False, print_period=20, log=False, **kwargs):
     """
     Solve the entropic regularization OT problem with log stabilization
 
@@ -526,11 +530,13 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, wa
 
     def get_K(alpha, beta):
         """log space computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg)
+        return np.exp(-(M - alpha.reshape((na, 1)) -
+                        beta.reshape((1, nb))) / reg)
 
     def get_Gamma(alpha, beta, u, v):
         """log space gamma computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
+        return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) /
+                      reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
 
     # print(np.min(K))
 
@@ -620,7 +626,8 @@ def get_Gamma(alpha, beta, u, v):
             return get_Gamma(alpha, beta, u, v)
 
 
-def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs):
+def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100,
+                             tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs):
     """
     Solve the entropic regularization optimal transport problem with log
     stabilization and epsilon scaling.
@@ -739,7 +746,8 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
 
     def get_K(alpha, beta):
         """log space computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg)
+        return np.exp(-(M - alpha.reshape((na, 1)) -
+                        beta.reshape((1, nb))) / reg)
 
     # print(np.min(K))
     def get_reg(n):  # exponential decreasing
@@ -811,7 +819,8 @@ def projC(gamma, q):
     return np.multiply(gamma, q / np.maximum(np.sum(gamma, axis=0), 1e-10))
 
 
-def barycenter(A, M, reg, weights=None, numItermax=1000, stopThr=1e-4, verbose=False, log=False):
+def barycenter(A, M, reg, weights=None, numItermax=1000,
+               stopThr=1e-4, verbose=False, log=False):
     """Compute the entropic regularized wasserstein barycenter of distributions A
 
      The function solves the following optimization problem:
@@ -904,7 +913,8 @@ def barycenter(A, M, reg, weights=None, numItermax=1000, stopThr=1e-4, verbose=F
         return geometricBar(weights, UKv)
 
 
-def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, stopThr=1e-3, verbose=False, log=False):
+def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
+          stopThr=1e-3, verbose=False, log=False):
     """
     Compute the unmixing of an observation with a given dictionary using Wasserstein distance
 
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 5c09da213..6371feba1 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -107,7 +107,8 @@ def emd(a, b, M, numItermax=100000, log=False):
     return G
 
 
-def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False, return_matrix=False):
+def emd2(a, b, M, processes=multiprocessing.cpu_count(),
+         numItermax=100000, log=False, return_matrix=False):
     """Solves the Earth Movers distance problem and returns the loss
 
     .. math::
diff --git a/ot/optim.py b/ot/optim.py
index 1d09adcf0..f31fae2d1 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -15,7 +15,8 @@
 # The corresponding scipy function does not work for matrices
 
 
-def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=0.99):
+def line_search_armijo(f, xk, pk, gfk, old_fval,
+                       args=(), c1=1e-4, alpha0=0.99):
     """
     Armijo linesearch function that works with matrices
 
@@ -71,7 +72,8 @@ def phi(alpha1):
     return alpha, fc[0], phi1
 
 
-def cg(a, b, M, reg, f, df, G0=None, numItermax=200, stopThr=1e-9, verbose=False, log=False):
+def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
+       stopThr=1e-9, verbose=False, log=False):
     """
     Solve the general regularized OT problem with conditional gradient
 
@@ -202,7 +204,8 @@ def cost(G):
         return G
 
 
-def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, numInnerItermax=200, stopThr=1e-9, verbose=False, log=False):
+def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
+        numInnerItermax=200, stopThr=1e-9, verbose=False, log=False):
     """
     Solve the general regularized OT problem with the generalized conditional gradient
 
diff --git a/ot/utils.py b/ot/utils.py
index 9eab3fcac..16862ead8 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -316,7 +316,7 @@ def _is_deprecated(func):
         closures = []
     is_deprecated = ('deprecated' in ''.join([c.cell_contents
                                               for c in closures
-                     if isinstance(c.cell_contents, str)]))
+                                              if isinstance(c.cell_contents, str)]))
     return is_deprecated
 
 
diff --git a/test/test_da.py b/test/test_da.py
index 593dc537b..7b63daf6f 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -444,6 +444,24 @@ def test_mapping_transport_class():
     assert len(otda.log_.keys()) != 0
 
 
+def test_linear_mapping():
+
+    ns = 150
+    nt = 200
+
+    Xs, ys = get_data_classif('3gauss', ns)
+    Xt, yt = get_data_classif('3gauss2', nt)
+
+    A, b = ot.da.OT_mapping_linear(Xs, Xt)
+
+    Xst = Xs.dot(A) + b
+
+    Ct = np.cov(Xt.T)
+    Cst = np.cov(Xst.T)
+
+    np.testing.assert_allclose(Ct, Cst, rtol=1e-2, atol=1e-2)
+
+
 def test_otda():
 
     n_samples = 150  # nb samples

From 5efdf008865ea347775708b637d933e048d663ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 09:03:58 +0100
Subject: [PATCH 27/42] add test linear mapping class

---
 Makefile        |  5 +++++
 test/test_da.py | 24 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/Makefile b/Makefile
index 7e0c576c4..468d042c0 100644
--- a/Makefile
+++ b/Makefile
@@ -56,6 +56,11 @@ rdoc :
 
 notebook :
 	ipython notebook --matplotlib=inline  --notebook-dir=notebooks/
+	
+autopep8 :
+	autopep8 -ir test ot examples
 
+aautopep8 :
+	autopep8 -air test ot examples
 
 FORCE :
diff --git a/test/test_da.py b/test/test_da.py
index 7b63daf6f..a9d6d349c 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -462,6 +462,30 @@ def test_linear_mapping():
     np.testing.assert_allclose(Ct, Cst, rtol=1e-2, atol=1e-2)
 
 
+def test_linear_mapping_class():
+
+    ns = 150
+    nt = 200
+
+    Xs, ys = get_data_classif('3gauss', ns)
+    Xt, yt = get_data_classif('3gauss2', nt)
+
+    otmap = ot.da.LinearTransport()
+
+    otmap.fit(Xs=Xs, Xt=Xt)
+    assert hasattr(otmap, "A_")
+    assert hasattr(otmap, "B_")
+    assert hasattr(otmap, "A1_")
+    assert hasattr(otmap, "B1_")
+
+    Xst = otmap.transform(Xs=Xs)
+
+    Ct = np.cov(Xt.T)
+    Cst = np.cov(Xst.T)
+
+    np.testing.assert_allclose(Ct, Cst, rtol=1e-2, atol=1e-2)
+
+
 def test_otda():
 
     n_samples = 150  # nb samples

From fc9923dea2706b65ffe15fc86428cd8b53b5feb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 09:33:57 +0100
Subject: [PATCH 28/42] add tests for ot.uils

---
 test/test_utils.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index 1bd37cdc5..b524ef6fe 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -7,6 +7,7 @@
 
 import ot
 import numpy as np
+import sys
 
 
 def test_parmap():
@@ -123,3 +124,79 @@ def test_clean_zeros():
 
     assert len(a) == n - nz
     assert len(b) == n - nz2
+
+
+def test_cost_normalization():
+
+    C = np.random.rand(10, 10)
+
+    # does nothing
+    M0 = ot.utils.cost_normalization(C)
+    np.testing.assert_allclose(C, M0)
+
+    M = ot.utils.cost_normalization(C, 'median')
+    np.testing.assert_allclose(np.median(M), 1)
+
+    M = ot.utils.cost_normalization(C, 'max')
+    np.testing.assert_allclose(M.max(), 1)
+
+    M = ot.utils.cost_normalization(C, 'log')
+    np.testing.assert_allclose(M.max(), np.log(1 + C).max())
+
+    M = ot.utils.cost_normalization(C, 'loglog')
+    np.testing.assert_allclose(M.max(), np.log(1 + np.log(1 + C)).max())
+
+
+def test_check_params():
+
+    res1 = ot.utils.check_params(first='OK', second=20)
+    assert res1 is True
+
+    res0 = ot.utils.check_params(first='OK', second=None)
+    assert res0 is False
+
+
+def test_deprecated_func():
+
+    @ot.utils.deprecated('deprecated text for fun')
+    def fun():
+        pass
+
+    def fun2():
+        pass
+
+    @ot.utils.deprecated('deprecated text for class')
+    class Class():
+        pass
+
+    if sys.version_info < (3, 5):
+        print('Not tested')
+    else:
+        assert ot.utils._is_deprecated(fun) is True
+
+        assert ot.utils._is_deprecated(fun2) is False
+
+
+def test_BaseEstimator():
+
+    class Class(ot.utils.BaseEstimator):
+
+        def __init__(self, first='spam', second='eggs'):
+
+            self.first = first
+            self.second = second
+
+    cl = Class()
+
+    names = cl._get_param_names()
+    assert 'first' in names
+    assert 'second' in names
+
+    params = cl.get_params()
+    assert 'first' in params
+    assert 'second' in params
+
+    params['first'] = 'spam again'
+    cl.set_params(**params)
+
+    assert cl.first == 'spam again'

From 927395b40dae98bcf027b601b6df48a4318cfef2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:07:47 +0100
Subject: [PATCH 29/42] add externals for function signature

---
 ot/externals/__init__.py |   0
 ot/externals/funcsigs.py | 815 +++++++++++++++++++++++++++++++++++++++
 ot/gromov.py             |   2 +-
 ot/utils.py              |  10 +-
 4 files changed, 821 insertions(+), 6 deletions(-)
 create mode 100644 ot/externals/__init__.py
 create mode 100644 ot/externals/funcsigs.py

diff --git a/ot/externals/__init__.py b/ot/externals/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/ot/externals/funcsigs.py b/ot/externals/funcsigs.py
new file mode 100644
index 000000000..4e684690b
--- /dev/null
+++ b/ot/externals/funcsigs.py
@@ -0,0 +1,815 @@
+# Copyright 2001-2013 Python Software Foundation; All Rights Reserved
+"""Function signature objects for callables
+
+Back port of Python 3.3's function signature tools from the inspect module,
+modified to be compatible with Python 2.7 and 3.2+.
+"""
+from __future__ import absolute_import, division, print_function
+import itertools
+import functools
+import re
+import types
+
+from collections import OrderedDict
+
+__version__ = "0.4"
+
+__all__ = ['BoundArguments', 'Parameter', 'Signature', 'signature']
+
+
+_WrapperDescriptor = type(type.__call__)
+_MethodWrapper = type(all.__call__)
+
+_NonUserDefinedCallables = (_WrapperDescriptor,
+                            _MethodWrapper,
+                            types.BuiltinFunctionType)
+
+
+def formatannotation(annotation, base_module=None):
+    if isinstance(annotation, type):
+        if annotation.__module__ in ('builtins', '__builtin__', base_module):
+            return annotation.__name__
+        return annotation.__module__+'.'+annotation.__name__
+    return repr(annotation)
+
+
+def _get_user_defined_method(cls, method_name, *nested):
+    try:
+        if cls is type:
+            return
+        meth = getattr(cls, method_name)
+        for name in nested:
+            meth = getattr(meth, name, meth)
+    except AttributeError:
+        return
+    else:
+        if not isinstance(meth, _NonUserDefinedCallables):
+            # Once '__signature__' will be added to 'C'-level
+            # callables, this check won't be necessary
+            return meth
+
+
+def signature(obj):
+    '''Get a signature object for the passed callable.'''
+
+    if not callable(obj):
+        raise TypeError('{0!r} is not a callable object'.format(obj))
+
+    if isinstance(obj, types.MethodType):
+        sig = signature(obj.__func__)
+        if obj.__self__ is None:
+            # Unbound method: the first parameter becomes positional-only
+            if sig.parameters:
+                first = sig.parameters.values()[0].replace(
+                    kind=_POSITIONAL_ONLY)
+                return sig.replace(
+                    parameters=(first,) + tuple(sig.parameters.values())[1:])
+            else:
+                return sig
+        else:
+            # In this case we skip the first parameter of the underlying
+            # function (usually `self` or `cls`).
+            return sig.replace(parameters=tuple(sig.parameters.values())[1:])
+
+    try:
+        sig = obj.__signature__
+    except AttributeError:
+        pass
+    else:
+        if sig is not None:
+            return sig
+
+    try:
+        # Was this function wrapped by a decorator?
+        wrapped = obj.__wrapped__
+    except AttributeError:
+        pass
+    else:
+        return signature(wrapped)
+
+    if isinstance(obj, types.FunctionType):
+        return Signature.from_function(obj)
+
+    if isinstance(obj, functools.partial):
+        sig = signature(obj.func)
+
+        new_params = OrderedDict(sig.parameters.items())
+
+        partial_args = obj.args or ()
+        partial_keywords = obj.keywords or {}
+        try:
+            ba = sig.bind_partial(*partial_args, **partial_keywords)
+        except TypeError as ex:
+            msg = 'partial object {0!r} has incorrect arguments'.format(obj)
+            raise ValueError(msg)
+
+        for arg_name, arg_value in ba.arguments.items():
+            param = new_params[arg_name]
+            if arg_name in partial_keywords:
+                # We set a new default value, because the following code
+                # is correct:
+                #
+                #   >>> def foo(a): print(a)
+                #   >>> print(partial(partial(foo, a=10), a=20)())
+                #   20
+                #   >>> print(partial(partial(foo, a=10), a=20)(a=30))
+                #   30
+                #
+                # So, with 'partial' objects, passing a keyword argument is
+                # like setting a new default value for the corresponding
+                # parameter
+                #
+                # We also mark this parameter with '_partial_kwarg'
+                # flag.  Later, in '_bind', the 'default' value of this
+                # parameter will be added to 'kwargs', to simulate
+                # the 'functools.partial' real call.
+                new_params[arg_name] = param.replace(default=arg_value,
+                                                     _partial_kwarg=True)
+
+            elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and
+                            not param._partial_kwarg):
+                new_params.pop(arg_name)
+
+        return sig.replace(parameters=new_params.values())
+
+    sig = None
+    if isinstance(obj, type):
+        # obj is a class or a metaclass
+
+        # First, let's see if it has an overloaded __call__ defined
+        # in its metaclass
+        call = _get_user_defined_method(type(obj), '__call__')
+        if call is not None:
+            sig = signature(call)
+        else:
+            # Now we check if the 'obj' class has a '__new__' method
+            new = _get_user_defined_method(obj, '__new__')
+            if new is not None:
+                sig = signature(new)
+            else:
+                # Finally, we should have at least __init__ implemented
+                init = _get_user_defined_method(obj, '__init__')
+                if init is not None:
+                    sig = signature(init)
+    elif not isinstance(obj, _NonUserDefinedCallables):
+        # An object with __call__
+        # We also check that the 'obj' is not an instance of
+        # _WrapperDescriptor or _MethodWrapper to avoid
+        # infinite recursion (and even potential segfault)
+        call = _get_user_defined_method(type(obj), '__call__', 'im_func')
+        if call is not None:
+            sig = signature(call)
+
+    if sig is not None:
+        # For classes and objects we skip the first parameter of their
+        # __call__, __new__, or __init__ methods
+        return sig.replace(parameters=tuple(sig.parameters.values())[1:])
+
+    if isinstance(obj, types.BuiltinFunctionType):
+        # Raise a nicer error message for builtins
+        msg = 'no signature found for builtin function {0!r}'.format(obj)
+        raise ValueError(msg)
+
+    raise ValueError('callable {0!r} is not supported by signature'.format(obj))
+
+
+class _void(object):
+    '''A private marker - used in Parameter & Signature'''
+
+
+class _empty(object):
+    pass
+
+
+class _ParameterKind(int):
+    def __new__(self, *args, **kwargs):
+        obj = int.__new__(self, *args)
+        obj._name = kwargs['name']
+        return obj
+
+    def __str__(self):
+        return self._name
+
+    def __repr__(self):
+        return '<_ParameterKind: {0!r}>'.format(self._name)
+
+
+_POSITIONAL_ONLY        = _ParameterKind(0, name='POSITIONAL_ONLY')
+_POSITIONAL_OR_KEYWORD  = _ParameterKind(1, name='POSITIONAL_OR_KEYWORD')
+_VAR_POSITIONAL         = _ParameterKind(2, name='VAR_POSITIONAL')
+_KEYWORD_ONLY           = _ParameterKind(3, name='KEYWORD_ONLY')
+_VAR_KEYWORD            = _ParameterKind(4, name='VAR_KEYWORD')
+
+
+class Parameter(object):
+    '''Represents a parameter in a function signature.
+
+    Has the following public attributes:
+
+    * name : str
+        The name of the parameter as a string.
+    * default : object
+        The default value for the parameter if specified.  If the
+        parameter has no default value, this attribute is not set.
+    * annotation
+        The annotation for the parameter if specified.  If the
+        parameter has no annotation, this attribute is not set.
+    * kind : str
+        Describes how argument values are bound to the parameter.
+        Possible values: `Parameter.POSITIONAL_ONLY`,
+        `Parameter.POSITIONAL_OR_KEYWORD`, `Parameter.VAR_POSITIONAL`,
+        `Parameter.KEYWORD_ONLY`, `Parameter.VAR_KEYWORD`.
+    '''
+
+    __slots__ = ('_name', '_kind', '_default', '_annotation', '_partial_kwarg')
+
+    POSITIONAL_ONLY         = _POSITIONAL_ONLY
+    POSITIONAL_OR_KEYWORD   = _POSITIONAL_OR_KEYWORD
+    VAR_POSITIONAL          = _VAR_POSITIONAL
+    KEYWORD_ONLY            = _KEYWORD_ONLY
+    VAR_KEYWORD             = _VAR_KEYWORD
+
+    empty = _empty
+
+    def __init__(self, name, kind, default=_empty, annotation=_empty,
+                 _partial_kwarg=False):
+
+        if kind not in (_POSITIONAL_ONLY, _POSITIONAL_OR_KEYWORD,
+                        _VAR_POSITIONAL, _KEYWORD_ONLY, _VAR_KEYWORD):
+            raise ValueError("invalid value for 'Parameter.kind' attribute")
+        self._kind = kind
+
+        if default is not _empty:
+            if kind in (_VAR_POSITIONAL, _VAR_KEYWORD):
+                msg = '{0} parameters cannot have default values'.format(kind)
+                raise ValueError(msg)
+        self._default = default
+        self._annotation = annotation
+
+        if name is None:
+            if kind != _POSITIONAL_ONLY:
+                raise ValueError("None is not a valid name for a "
+                                 "non-positional-only parameter")
+            self._name = name
+        else:
+            name = str(name)
+            if kind != _POSITIONAL_ONLY and not re.match(r'[a-z_]\w*$', name, re.I):
+                msg = '{0!r} is not a valid parameter name'.format(name)
+                raise ValueError(msg)
+            self._name = name
+
+        self._partial_kwarg = _partial_kwarg
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def default(self):
+        return self._default
+
+    @property
+    def annotation(self):
+        return self._annotation
+
+    @property
+    def kind(self):
+        return self._kind
+
+    def replace(self, name=_void, kind=_void, annotation=_void,
+                default=_void, _partial_kwarg=_void):
+        '''Creates a customized copy of the Parameter.'''
+
+        if name is _void:
+            name = self._name
+
+        if kind is _void:
+            kind = self._kind
+
+        if annotation is _void:
+            annotation = self._annotation
+
+        if default is _void:
+            default = self._default
+
+        if _partial_kwarg is _void:
+            _partial_kwarg = self._partial_kwarg
+
+        return type(self)(name, kind, default=default, annotation=annotation,
+                          _partial_kwarg=_partial_kwarg)
+
+    def __str__(self):
+        kind = self.kind
+
+        formatted = self._name
+        if kind == _POSITIONAL_ONLY:
+            if formatted is None:
+                formatted = ''
+            formatted = '<{0}>'.format(formatted)
+
+        # Add annotation and default value
+        if self._annotation is not _empty:
+            formatted = '{0}:{1}'.format(formatted,
+                                       formatannotation(self._annotation))
+
+        if self._default is not _empty:
+            formatted = '{0}={1}'.format(formatted, repr(self._default))
+
+        if kind == _VAR_POSITIONAL:
+            formatted = '*' + formatted
+        elif kind == _VAR_KEYWORD:
+            formatted = '**' + formatted
+
+        return formatted
+
+    def __repr__(self):
+        return '<{0} at {1:#x} {2!r}>'.format(self.__class__.__name__,
+                                           id(self), self.name)
+
+    def __hash__(self):
+        msg = "unhashable type: '{0}'".format(self.__class__.__name__)
+        raise TypeError(msg)
+
+    def __eq__(self, other):
+        return (issubclass(other.__class__, Parameter) and
+                self._name == other._name and
+                self._kind == other._kind and
+                self._default == other._default and
+                self._annotation == other._annotation)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+
+class BoundArguments(object):
+    '''Result of `Signature.bind` call.  Holds the mapping of arguments
+    to the function's parameters.
+
+    Has the following public attributes:
+
+    * arguments : OrderedDict
+        An ordered mutable mapping of parameters' names to arguments' values.
+        Does not contain arguments' default values.
+    * signature : Signature
+        The Signature object that created this instance.
+    * args : tuple
+        Tuple of positional arguments values.
+    * kwargs : dict
+        Dict of keyword arguments values.
+    '''
+
+    def __init__(self, signature, arguments):
+        self.arguments = arguments
+        self._signature = signature
+
+    @property
+    def signature(self):
+        return self._signature
+
+    @property
+    def args(self):
+        args = []
+        for param_name, param in self._signature.parameters.items():
+            if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
+                                                    param._partial_kwarg):
+                # Keyword arguments mapped by 'functools.partial'
+                # (Parameter._partial_kwarg is True) are mapped
+                # in 'BoundArguments.kwargs', along with VAR_KEYWORD &
+                # KEYWORD_ONLY
+                break
+
+            try:
+                arg = self.arguments[param_name]
+            except KeyError:
+                # We're done here. Other arguments
+                # will be mapped in 'BoundArguments.kwargs'
+                break
+            else:
+                if param.kind == _VAR_POSITIONAL:
+                    # *args
+                    args.extend(arg)
+                else:
+                    # plain argument
+                    args.append(arg)
+
+        return tuple(args)
+
+    @property
+    def kwargs(self):
+        kwargs = {}
+        kwargs_started = False
+        for param_name, param in self._signature.parameters.items():
+            if not kwargs_started:
+                if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
+                                                param._partial_kwarg):
+                    kwargs_started = True
+                else:
+                    if param_name not in self.arguments:
+                        kwargs_started = True
+                        continue
+
+            if not kwargs_started:
+                continue
+
+            try:
+                arg = self.arguments[param_name]
+            except KeyError:
+                pass
+            else:
+                if param.kind == _VAR_KEYWORD:
+                    # **kwargs
+                    kwargs.update(arg)
+                else:
+                    # plain keyword argument
+                    kwargs[param_name] = arg
+
+        return kwargs
+
+    def __hash__(self):
+        msg = "unhashable type: '{0}'".format(self.__class__.__name__)
+        raise TypeError(msg)
+
+    def __eq__(self, other):
+        return (issubclass(other.__class__, BoundArguments) and
+                self.signature == other.signature and
+                self.arguments == other.arguments)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+
+class Signature(object):
+    '''A Signature object represents the overall signature of a function.
+    It stores a Parameter object for each parameter accepted by the
+    function, as well as information specific to the function itself.
+
+    A Signature object has the following public attributes and methods:
+
+    * parameters : OrderedDict
+        An ordered mapping of parameters' names to the corresponding
+        Parameter objects (keyword-only arguments are in the same order
+        as listed in `code.co_varnames`).
+    * return_annotation : object
+        The annotation for the return type of the function if specified.
+        If the function has no annotation for its return type, this
+        attribute is not set.
+    * bind(*args, **kwargs) -> BoundArguments
+        Creates a mapping from positional and keyword arguments to
+        parameters.
+    * bind_partial(*args, **kwargs) -> BoundArguments
+        Creates a partial mapping from positional and keyword arguments
+        to parameters (simulating 'functools.partial' behavior.)
+    '''
+
+    __slots__ = ('_return_annotation', '_parameters')
+
+    _parameter_cls = Parameter
+    _bound_arguments_cls = BoundArguments
+
+    empty = _empty
+
+    def __init__(self, parameters=None, return_annotation=_empty,
+                 __validate_parameters__=True):
+        '''Constructs Signature from the given list of Parameter
+        objects and 'return_annotation'.  All arguments are optional.
+        '''
+
+        if parameters is None:
+            params = OrderedDict()
+        else:
+            if __validate_parameters__:
+                params = OrderedDict()
+                top_kind = _POSITIONAL_ONLY
+
+                for idx, param in enumerate(parameters):
+                    kind = param.kind
+                    if kind < top_kind:
+                        msg = 'wrong parameter order: {0} before {1}'
+                        msg = msg.format(top_kind, param.kind)
+                        raise ValueError(msg)
+                    else:
+                        top_kind = kind
+
+                    name = param.name
+                    if name is None:
+                        name = str(idx)
+                        param = param.replace(name=name)
+
+                    if name in params:
+                        msg = 'duplicate parameter name: {0!r}'.format(name)
+                        raise ValueError(msg)
+                    params[name] = param
+            else:
+                params = OrderedDict(((param.name, param)
+                                                for param in parameters))
+
+        self._parameters = params
+        self._return_annotation = return_annotation
+
+    @classmethod
+    def from_function(cls, func):
+        '''Constructs Signature for the given python function'''
+
+        if not isinstance(func, types.FunctionType):
+            raise TypeError('{0!r} is not a Python function'.format(func))
+
+        Parameter = cls._parameter_cls
+
+        # Parameter information.
+        func_code = func.__code__
+        pos_count = func_code.co_argcount
+        arg_names = func_code.co_varnames
+        positional = tuple(arg_names[:pos_count])
+        keyword_only_count = getattr(func_code, 'co_kwonlyargcount', 0)
+        keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)]
+        annotations = getattr(func, '__annotations__', {})
+        defaults = func.__defaults__
+        kwdefaults = getattr(func, '__kwdefaults__', None)
+
+        if defaults:
+            pos_default_count = len(defaults)
+        else:
+            pos_default_count = 0
+
+        parameters = []
+
+        # Non-keyword-only parameters w/o defaults.
+        non_default_count = pos_count - pos_default_count
+        for name in positional[:non_default_count]:
+            annotation = annotations.get(name, _empty)
+            parameters.append(Parameter(name, annotation=annotation,
+                                        kind=_POSITIONAL_OR_KEYWORD))
+
+        # ... w/ defaults.
+        for offset, name in enumerate(positional[non_default_count:]):
+            annotation = annotations.get(name, _empty)
+            parameters.append(Parameter(name, annotation=annotation,
+                                        kind=_POSITIONAL_OR_KEYWORD,
+                                        default=defaults[offset]))
+
+        # *args
+        if func_code.co_flags & 0x04:
+            name = arg_names[pos_count + keyword_only_count]
+            annotation = annotations.get(name, _empty)
+            parameters.append(Parameter(name, annotation=annotation,
+                                        kind=_VAR_POSITIONAL))
+
+        # Keyword-only parameters.
+        for name in keyword_only:
+            default = _empty
+            if kwdefaults is not None:
+                default = kwdefaults.get(name, _empty)
+
+            annotation = annotations.get(name, _empty)
+            parameters.append(Parameter(name, annotation=annotation,
+                                        kind=_KEYWORD_ONLY,
+                                        default=default))
+        # **kwargs
+        if func_code.co_flags & 0x08:
+            index = pos_count + keyword_only_count
+            if func_code.co_flags & 0x04:
+                index += 1
+
+            name = arg_names[index]
+            annotation = annotations.get(name, _empty)
+            parameters.append(Parameter(name, annotation=annotation,
+                                        kind=_VAR_KEYWORD))
+
+        return cls(parameters,
+                   return_annotation=annotations.get('return', _empty),
+                   __validate_parameters__=False)
+
+    @property
+    def parameters(self):
+        try:
+            return types.MappingProxyType(self._parameters)
+        except AttributeError:
+            return OrderedDict(self._parameters.items())
+
+    @property
+    def return_annotation(self):
+        return self._return_annotation
+
+    def replace(self, parameters=_void, return_annotation=_void):
+        '''Creates a customized copy of the Signature.
+        Pass 'parameters' and/or 'return_annotation' arguments
+        to override them in the new copy.
+        '''
+
+        if parameters is _void:
+            parameters = self.parameters.values()
+
+        if return_annotation is _void:
+            return_annotation = self._return_annotation
+
+        return type(self)(parameters,
+                          return_annotation=return_annotation)
+
+    def __hash__(self):
+        msg = "unhashable type: '{0}'".format(self.__class__.__name__)
+        raise TypeError(msg)
+
+    def __eq__(self, other):
+        if (not issubclass(type(other), Signature) or
+                    self.return_annotation != other.return_annotation or
+                    len(self.parameters) != len(other.parameters)):
+            return False
+
+        other_positions = dict((param, idx)
+                           for idx, param in enumerate(other.parameters.keys()))
+
+        for idx, (param_name, param) in enumerate(self.parameters.items()):
+            if param.kind == _KEYWORD_ONLY:
+                try:
+                    other_param = other.parameters[param_name]
+                except KeyError:
+                    return False
+                else:
+                    if param != other_param:
+                        return False
+            else:
+                try:
+                    other_idx = other_positions[param_name]
+                except KeyError:
+                    return False
+                else:
+                    if (idx != other_idx or
+                                    param != other.parameters[param_name]):
+                        return False
+
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def _bind(self, args, kwargs, partial=False):
+        '''Private method.  Don't use directly.'''
+
+        arguments = OrderedDict()
+
+        parameters = iter(self.parameters.values())
+        parameters_ex = ()
+        arg_vals = iter(args)
+
+        if partial:
+            # Support for binding arguments to 'functools.partial' objects.
+            # See 'functools.partial' case in 'signature()' implementation
+            # for details.
+            for param_name, param in self.parameters.items():
+                if (param._partial_kwarg and param_name not in kwargs):
+                    # Simulating 'functools.partial' behavior
+                    kwargs[param_name] = param.default
+
+        while True:
+            # Let's iterate through the positional arguments and corresponding
+            # parameters
+            try:
+                arg_val = next(arg_vals)
+            except StopIteration:
+                # No more positional arguments
+                try:
+                    param = next(parameters)
+                except StopIteration:
+                    # No more parameters. That's it. Just need to check that
+                    # we have no `kwargs` after this while loop
+                    break
+                else:
+                    if param.kind == _VAR_POSITIONAL:
+                        # That's OK, just empty *args.  Let's start parsing
+                        # kwargs
+                        break
+                    elif param.name in kwargs:
+                        if param.kind == _POSITIONAL_ONLY:
+                            msg = '{arg!r} parameter is positional only, ' \
+                                  'but was passed as a keyword'
+                            msg = msg.format(arg=param.name)
+                            raise TypeError(msg)
+                        parameters_ex = (param,)
+                        break
+                    elif (param.kind == _VAR_KEYWORD or
+                                                param.default is not _empty):
+                        # That's fine too - we have a default value for this
+                        # parameter.  So, lets start parsing `kwargs`, starting
+                        # with the current parameter
+                        parameters_ex = (param,)
+                        break
+                    else:
+                        if partial:
+                            parameters_ex = (param,)
+                            break
+                        else:
+                            msg = '{arg!r} parameter lacking default value'
+                            msg = msg.format(arg=param.name)
+                            raise TypeError(msg)
+            else:
+                # We have a positional argument to process
+                try:
+                    param = next(parameters)
+                except StopIteration:
+                    raise TypeError('too many positional arguments')
+                else:
+                    if param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY):
+                        # Looks like we have no parameter for this positional
+                        # argument
+                        raise TypeError('too many positional arguments')
+
+                    if param.kind == _VAR_POSITIONAL:
+                        # We have an '*args'-like argument, let's fill it with
+                        # all positional arguments we have left and move on to
+                        # the next phase
+                        values = [arg_val]
+                        values.extend(arg_vals)
+                        arguments[param.name] = tuple(values)
+                        break
+
+                    if param.name in kwargs:
+                        raise TypeError('multiple values for argument '
+                                        '{arg!r}'.format(arg=param.name))
+
+                    arguments[param.name] = arg_val
+
+        # Now, we iterate through the remaining parameters to process
+        # keyword arguments
+        kwargs_param = None
+        for param in itertools.chain(parameters_ex, parameters):
+            if param.kind == _POSITIONAL_ONLY:
+                # This should never happen in case of a properly built
+                # Signature object (but let's have this check here
+                # to ensure correct behaviour just in case)
+                raise TypeError('{arg!r} parameter is positional only, '
+                                'but was passed as a keyword'. \
+                                format(arg=param.name))
+
+            if param.kind == _VAR_KEYWORD:
+                # Memorize that we have a '**kwargs'-like parameter
+                kwargs_param = param
+                continue
+
+            param_name = param.name
+            try:
+                arg_val = kwargs.pop(param_name)
+            except KeyError:
+                # We have no value for this parameter.  It's fine though,
+                # if it has a default value, or it is an '*args'-like
+                # parameter, left alone by the processing of positional
+                # arguments.
+                if (not partial and param.kind != _VAR_POSITIONAL and
+                                                    param.default is _empty):
+                    raise TypeError('{arg!r} parameter lacking default value'. \
+                                    format(arg=param_name))
+
+            else:
+                arguments[param_name] = arg_val
+
+        if kwargs:
+            if kwargs_param is not None:
+                # Process our '**kwargs'-like parameter
+                arguments[kwargs_param.name] = kwargs
+            else:
+                raise TypeError('too many keyword arguments')
+
+        return self._bound_arguments_cls(self, arguments)
+
+    def bind(self, *args, **kwargs):
+        '''Get a BoundArguments object, that maps the passed `args`
+        and `kwargs` to the function's signature.  Raises `TypeError`
+        if the passed arguments can not be bound.
+        '''
+        return self._bind(args, kwargs)
+
+    def bind_partial(self, *args, **kwargs):
+        '''Get a BoundArguments object, that partially maps the
+        passed `args` and `kwargs` to the function's signature.
+        Raises `TypeError` if the passed arguments can not be bound.
+        '''
+        return self._bind(args, kwargs, partial=True)
+
+    def __str__(self):
+        result = []
+        render_kw_only_separator = True
+        for idx, param in enumerate(self.parameters.values()):
+            formatted = str(param)
+
+            kind = param.kind
+            if kind == _VAR_POSITIONAL:
+                # OK, we have an '*args'-like parameter, so we won't need
+                # a '*' to separate keyword-only arguments
+                render_kw_only_separator = False
+            elif kind == _KEYWORD_ONLY and render_kw_only_separator:
+                # We have a keyword-only parameter to render and we haven't
+                # rendered an '*args'-like parameter before, so add a '*'
+                # separator to the parameters list ("foo(arg1, *, arg2)" case)
+                result.append('*')
+                # This condition should be only triggered once, so
+                # reset the flag
+                render_kw_only_separator = False
+
+            result.append(formatted)
+
+        rendered = '({0})'.format(', '.join(result))
+
+        if self.return_annotation is not _empty:
+            anno = formatannotation(self.return_annotation)
+            rendered += ' -> {0}'.format(anno)
+
+        return rendered
diff --git a/ot/gromov.py b/ot/gromov.py
index 2a2387315..e03fa5b0c 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -595,7 +595,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
     gw, logv = entropic_gromov_wasserstein(
         C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log=True)
 
-    log['T'] = gw
+    logv['T'] = gw
 
     if log:
         return logv['gw_dist'], logv
diff --git a/ot/utils.py b/ot/utils.py
index 16862ead8..17983f2e1 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -15,7 +15,10 @@
 from scipy.spatial.distance import cdist
 import sys
 import warnings
-
+try:
+    from inspect import signature
+except ImportError:
+    from .externals.funcsigs import signature
 
 __time_tic_toc = time.time()
 
@@ -335,10 +338,7 @@ class BaseEstimator(object):
     @classmethod
     def _get_param_names(cls):
         """Get parameter names for the estimator"""
-        try:
-            from inspect import signature
-        except ImportError:
-            from .externals.funcsigs import signature
+
         # fetch the constructor or the original constructor before
         # deprecation wrapping if any
         init = getattr(cls.__init__, 'deprecated_original', cls.__init__)

From 55aaf7874c651235d44c34b89337df7694e55014 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:08:17 +0100
Subject: [PATCH 30/42] add test gromov + debug sklearn Basestimator

---
 test/test_da.py     |  4 ++--
 test/test_gromov.py | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/test/test_da.py b/test/test_da.py
index a9d6d349c..3022721c4 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -326,8 +326,8 @@ def test_mapping_transport_class():
     """test_mapping_transport
     """
 
-    ns = 150
-    nt = 200
+    ns = 60
+    nt = 120
 
     Xs, ys = get_data_classif('3gauss', ns)
     Xt, yt = get_data_classif('3gauss2', nt)
diff --git a/test/test_gromov.py b/test/test_gromov.py
index 625e62a96..0384ee11b 100644
--- a/test/test_gromov.py
+++ b/test/test_gromov.py
@@ -36,6 +36,18 @@ def test_gromov():
     np.testing.assert_allclose(
         q, G.sum(0), atol=1e-04)  # cf convergence gromov
 
+    gw, log = ot.gromov.gromov_wasserstein2(C1, C2, p, q, 'kl_loss', log=True)
+
+    G = log['T']
+
+    np.testing.assert_allclose(gw, 0, atol=1e-04, rtol=1e-4)
+
+    # check constratints
+    np.testing.assert_allclose(
+        p, G.sum(1), atol=1e-04)  # cf convergence gromov
+    np.testing.assert_allclose(
+        q, G.sum(0), atol=1e-04)  # cf convergence gromov
+
 
 def test_entropic_gromov():
     n_samples = 50  # nb samples
@@ -64,3 +76,16 @@ def test_entropic_gromov():
         p, G.sum(1), atol=1e-04)  # cf convergence gromov
     np.testing.assert_allclose(
         q, G.sum(0), atol=1e-04)  # cf convergence gromov
+
+    gw, log = ot.gromov.entropic_gromov_wasserstein2(
+        C1, C2, p, q, 'kl_loss', epsilon=1e-2, log=True)
+
+    G = log['T']
+
+    np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e1)
+
+    # check constratints
+    np.testing.assert_allclose(
+        p, G.sum(1), atol=1e-04)  # cf convergence gromov
+    np.testing.assert_allclose(
+        q, G.sum(0), atol=1e-04)  # cf convergence gromov

From 64ef33d09906a1aebd3c8294ffd7720475ab926b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:13:16 +0100
Subject: [PATCH 31/42] aupdate gromov + autopep8 externals

---
 ot/externals/funcsigs.py | 56 +++++++++++++++++++++-------------------
 test/test_gromov.py      |  2 +-
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/ot/externals/funcsigs.py b/ot/externals/funcsigs.py
index 4e684690b..82a55dac5 100644
--- a/ot/externals/funcsigs.py
+++ b/ot/externals/funcsigs.py
@@ -29,7 +29,7 @@ def formatannotation(annotation, base_module=None):
     if isinstance(annotation, type):
         if annotation.__module__ in ('builtins', '__builtin__', base_module):
             return annotation.__name__
-        return annotation.__module__+'.'+annotation.__name__
+        return annotation.__module__ + '.' + annotation.__name__
     return repr(annotation)
 
 
@@ -127,7 +127,7 @@ def signature(obj):
                                                      _partial_kwarg=True)
 
             elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and
-                            not param._partial_kwarg):
+                  not param._partial_kwarg):
                 new_params.pop(arg_name)
 
         return sig.replace(parameters=new_params.values())
@@ -170,7 +170,8 @@ def signature(obj):
         msg = 'no signature found for builtin function {0!r}'.format(obj)
         raise ValueError(msg)
 
-    raise ValueError('callable {0!r} is not supported by signature'.format(obj))
+    raise ValueError(
+        'callable {0!r} is not supported by signature'.format(obj))
 
 
 class _void(object):
@@ -194,11 +195,11 @@ def __repr__(self):
         return '<_ParameterKind: {0!r}>'.format(self._name)
 
 
-_POSITIONAL_ONLY        = _ParameterKind(0, name='POSITIONAL_ONLY')
-_POSITIONAL_OR_KEYWORD  = _ParameterKind(1, name='POSITIONAL_OR_KEYWORD')
-_VAR_POSITIONAL         = _ParameterKind(2, name='VAR_POSITIONAL')
-_KEYWORD_ONLY           = _ParameterKind(3, name='KEYWORD_ONLY')
-_VAR_KEYWORD            = _ParameterKind(4, name='VAR_KEYWORD')
+_POSITIONAL_ONLY = _ParameterKind(0, name='POSITIONAL_ONLY')
+_POSITIONAL_OR_KEYWORD = _ParameterKind(1, name='POSITIONAL_OR_KEYWORD')
+_VAR_POSITIONAL = _ParameterKind(2, name='VAR_POSITIONAL')
+_KEYWORD_ONLY = _ParameterKind(3, name='KEYWORD_ONLY')
+_VAR_KEYWORD = _ParameterKind(4, name='VAR_KEYWORD')
 
 
 class Parameter(object):
@@ -223,11 +224,11 @@ class Parameter(object):
 
     __slots__ = ('_name', '_kind', '_default', '_annotation', '_partial_kwarg')
 
-    POSITIONAL_ONLY         = _POSITIONAL_ONLY
-    POSITIONAL_OR_KEYWORD   = _POSITIONAL_OR_KEYWORD
-    VAR_POSITIONAL          = _VAR_POSITIONAL
-    KEYWORD_ONLY            = _KEYWORD_ONLY
-    VAR_KEYWORD             = _VAR_KEYWORD
+    POSITIONAL_ONLY = _POSITIONAL_ONLY
+    POSITIONAL_OR_KEYWORD = _POSITIONAL_OR_KEYWORD
+    VAR_POSITIONAL = _VAR_POSITIONAL
+    KEYWORD_ONLY = _KEYWORD_ONLY
+    VAR_KEYWORD = _VAR_KEYWORD
 
     empty = _empty
 
@@ -253,7 +254,8 @@ def __init__(self, name, kind, default=_empty, annotation=_empty,
             self._name = name
         else:
             name = str(name)
-            if kind != _POSITIONAL_ONLY and not re.match(r'[a-z_]\w*$', name, re.I):
+            if kind != _POSITIONAL_ONLY and not re.match(
+                    r'[a-z_]\w*$', name, re.I):
                 msg = '{0!r} is not a valid parameter name'.format(name)
                 raise ValueError(msg)
             self._name = name
@@ -310,7 +312,7 @@ def __str__(self):
         # Add annotation and default value
         if self._annotation is not _empty:
             formatted = '{0}:{1}'.format(formatted,
-                                       formatannotation(self._annotation))
+                                         formatannotation(self._annotation))
 
         if self._default is not _empty:
             formatted = '{0}={1}'.format(formatted, repr(self._default))
@@ -324,7 +326,7 @@ def __str__(self):
 
     def __repr__(self):
         return '<{0} at {1:#x} {2!r}>'.format(self.__class__.__name__,
-                                           id(self), self.name)
+                                              id(self), self.name)
 
     def __hash__(self):
         msg = "unhashable type: '{0}'".format(self.__class__.__name__)
@@ -371,7 +373,7 @@ def args(self):
         args = []
         for param_name, param in self._signature.parameters.items():
             if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
-                                                    param._partial_kwarg):
+                    param._partial_kwarg):
                 # Keyword arguments mapped by 'functools.partial'
                 # (Parameter._partial_kwarg is True) are mapped
                 # in 'BoundArguments.kwargs', along with VAR_KEYWORD &
@@ -401,7 +403,7 @@ def kwargs(self):
         for param_name, param in self._signature.parameters.items():
             if not kwargs_started:
                 if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
-                                                param._partial_kwarg):
+                        param._partial_kwarg):
                     kwargs_started = True
                 else:
                     if param_name not in self.arguments:
@@ -501,7 +503,7 @@ def __init__(self, parameters=None, return_annotation=_empty,
                     params[name] = param
             else:
                 params = OrderedDict(((param.name, param)
-                                                for param in parameters))
+                                      for param in parameters))
 
         self._parameters = params
         self._return_annotation = return_annotation
@@ -611,12 +613,12 @@ def __hash__(self):
 
     def __eq__(self, other):
         if (not issubclass(type(other), Signature) or
-                    self.return_annotation != other.return_annotation or
-                    len(self.parameters) != len(other.parameters)):
+            self.return_annotation != other.return_annotation or
+                len(self.parameters) != len(other.parameters)):
             return False
 
         other_positions = dict((param, idx)
-                           for idx, param in enumerate(other.parameters.keys()))
+                               for idx, param in enumerate(other.parameters.keys()))
 
         for idx, (param_name, param) in enumerate(self.parameters.items()):
             if param.kind == _KEYWORD_ONLY:
@@ -634,7 +636,7 @@ def __eq__(self, other):
                     return False
                 else:
                     if (idx != other_idx or
-                                    param != other.parameters[param_name]):
+                            param != other.parameters[param_name]):
                         return False
 
         return True
@@ -687,7 +689,7 @@ def _bind(self, args, kwargs, partial=False):
                         parameters_ex = (param,)
                         break
                     elif (param.kind == _VAR_KEYWORD or
-                                                param.default is not _empty):
+                          param.default is not _empty):
                         # That's fine too - we have a default value for this
                         # parameter.  So, lets start parsing `kwargs`, starting
                         # with the current parameter
@@ -737,7 +739,7 @@ def _bind(self, args, kwargs, partial=False):
                 # Signature object (but let's have this check here
                 # to ensure correct behaviour just in case)
                 raise TypeError('{arg!r} parameter is positional only, '
-                                'but was passed as a keyword'. \
+                                'but was passed as a keyword'.
                                 format(arg=param.name))
 
             if param.kind == _VAR_KEYWORD:
@@ -754,8 +756,8 @@ def _bind(self, args, kwargs, partial=False):
                 # parameter, left alone by the processing of positional
                 # arguments.
                 if (not partial and param.kind != _VAR_POSITIONAL and
-                                                    param.default is _empty):
-                    raise TypeError('{arg!r} parameter lacking default value'. \
+                        param.default is _empty):
+                    raise TypeError('{arg!r} parameter lacking default value'.
                                     format(arg=param_name))
 
             else:
diff --git a/test/test_gromov.py b/test/test_gromov.py
index 0384ee11b..0dfd54e24 100644
--- a/test/test_gromov.py
+++ b/test/test_gromov.py
@@ -40,7 +40,7 @@ def test_gromov():
 
     G = log['T']
 
-    np.testing.assert_allclose(gw, 0, atol=1e-04, rtol=1e-4)
+    np.testing.assert_allclose(gw, 0, atol=1e-2, rtol=1e-2)
 
     # check constratints
     np.testing.assert_allclose(

From 7095e03eb339bcf32d91c5a8857ecc3f3d0c45c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:28:19 +0100
Subject: [PATCH 32/42] gtomov barycenter tests

---
 test/test_gromov.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/test/test_gromov.py b/test/test_gromov.py
index 0dfd54e24..d865380be 100644
--- a/test/test_gromov.py
+++ b/test/test_gromov.py
@@ -40,7 +40,7 @@ def test_gromov():
 
     G = log['T']
 
-    np.testing.assert_allclose(gw, 0, atol=1e-2, rtol=1e-2)
+    np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1)
 
     # check constratints
     np.testing.assert_allclose(
@@ -82,10 +82,37 @@ def test_entropic_gromov():
 
     G = log['T']
 
-    np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e1)
+    np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1)
 
     # check constratints
     np.testing.assert_allclose(
         p, G.sum(1), atol=1e-04)  # cf convergence gromov
     np.testing.assert_allclose(
         q, G.sum(0), atol=1e-04)  # cf convergence gromov
+
+
+def test_gromov_barycenter():
+
+    ns = 50
+    nt = 60
+
+    Xs, ys = ot.datasets.get_data_classif('3gauss', ns)
+    Xt, yt = ot.datasets.get_data_classif('3gauss2', nt)
+
+    C1 = ot.dist(Xs)
+    C2 = ot.dist(Xt)
+
+    n_samples = 3
+    Cb = ot.gromov.gromov_barycenters(n_samples, [C1, C2],
+                                      [ot.unif(ns), ot.unif(nt)
+                                       ], ot.unif(n_samples), [.5, .5],
+                                      'square_loss',  # 5e-4,
+                                      max_iter=100, tol=1e-3)
+    np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))
+
+    Cb2 = ot.gromov.gromov_barycenters(n_samples, [C1, C2],
+                                       [ot.unif(ns), ot.unif(nt)
+                                        ], ot.unif(n_samples), [.5, .5],
+                                       'kl_loss',  # 5e-4,
+                                       max_iter=100, tol=1e-3)
+    np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))

From 63fd11e8bfd45b163b313c7ad874ef608587fb68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:32:18 +0100
Subject: [PATCH 33/42] add entropic gromov test for 90+% corerage

---
 ot/gromov.py        |  2 +-
 test/test_gromov.py | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/ot/gromov.py b/ot/gromov.py
index e03fa5b0c..65b2e29f9 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -613,7 +613,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
     The function solves the following optimization problem:
 
     .. math::
-        C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
+        C = argmin_C\in R^{NxN} \sum_s \lambda_s GW(C,Cs,p,ps)
 
 
     Where :
diff --git a/test/test_gromov.py b/test/test_gromov.py
index d865380be..bb23469f4 100644
--- a/test/test_gromov.py
+++ b/test/test_gromov.py
@@ -116,3 +116,30 @@ def test_gromov_barycenter():
                                        'kl_loss',  # 5e-4,
                                        max_iter=100, tol=1e-3)
     np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))
+
+
+def test_gromov_entropic_barycenter():
+
+    ns = 50
+    nt = 60
+
+    Xs, ys = ot.datasets.get_data_classif('3gauss', ns)
+    Xt, yt = ot.datasets.get_data_classif('3gauss2', nt)
+
+    C1 = ot.dist(Xs)
+    C2 = ot.dist(Xt)
+
+    n_samples = 3
+    Cb = ot.gromov.entropic_gromov_barycenters(n_samples, [C1, C2],
+                                               [ot.unif(ns), ot.unif(nt)
+                                                ], ot.unif(n_samples), [.5, .5],
+                                               'square_loss', 1e-3,
+                                               max_iter=100, tol=1e-3)
+    np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))
+
+    Cb2 = ot.gromov.entropic_gromov_barycenters(n_samples, [C1, C2],
+                                                [ot.unif(ns), ot.unif(nt)
+                                                 ], ot.unif(n_samples), [.5, .5],
+                                                'kl_loss', 1e-3,
+                                                max_iter=100, tol=1e-3)
+    np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))

From 1262563ef24c9ab0213f616ef01e1c80eb977176 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:50:16 +0100
Subject: [PATCH 34/42] update readme + doc

---
 ot/da.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/ot/da.py b/ot/da.py
index f789396a6..5d62d43ea 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -643,7 +643,7 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
     The function estimate the optimal linear operator that align the two
     empirical distributions. This is equivalent to estimating the closed
     form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)`
-    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14].
+    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark 2.29 in [15].
 
     The linear operator from source to target :math:`M`
 
@@ -692,6 +692,9 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
     .. [14] Knott, M. and Smith, C. S. "On the optimal mapping of
         distributions", Journal of Optimization Theory and Applications
         Vol 43, 1984
+        
+    .. [15]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal 
+        Transport", 2018.
 
 
     """
@@ -1290,7 +1293,8 @@ class LinearTransport(BaseTransport):
     The function estimate the optimal linear operator that align the two
     empirical distributions. This is equivalent to estimating the closed
     form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)`
-    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14].
+    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in 
+    remark 2.29 in [15].
 
     The linear operator from source to target :math:`M`
 
@@ -1314,6 +1318,15 @@ class LinearTransport(BaseTransport):
     log : bool, optional
         record log if True
 
+    References
+    ----------
+
+    .. [14] Knott, M. and Smith, C. S. "On the optimal mapping of
+        distributions", Journal of Optimization Theory and Applications
+        Vol 43, 1984
+        
+    .. [15]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal 
+        Transport", 2018.
 
     """
 

From 0ce1a5ed14e44cd0c596fc0393eceeb8199d20d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:50:45 +0100
Subject: [PATCH 35/42] update doc

---
 README.md              |  4 ++++
 docs/source/readme.rst | 17 ++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 8a9d7faa5..fb7ab2a93 100644
--- a/README.md
+++ b/README.md
@@ -206,3 +206,7 @@ You can also post bug reports and feature requests in Github issues. Make sure t
 [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon, [Gromov-Wasserstein averaging of kernel and distance matrices](http://proceedings.mlr.press/v48/peyre16.html)  International Conference on Machine Learning (ICML). 2016.
 
 [13] Mémoli, Facundo. [Gromov–Wasserstein distances and the metric approach to object matching](https://media.adelaide.edu.au/acvt/Publications/2011/2011-Gromov%E2%80%93Wasserstein%20Distances%20and%20the%20Metric%20Approach%20to%20Object%20Matching.pdf). Foundations of computational mathematics 11.4 (2011): 417-487.
+
+[14] Knott, M. and Smith, C. S. [On the optimal mapping of distributions](https://link.springer.com/article/10.1007/BF00934745), Journal of Optimization Theory and Applications Vol 43, 1984.
+
+[15] Peyré, G., & Cuturi, M. (2017). [Computational Optimal Transport](https://arxiv.org/pdf/1803.00567.pdf) , 2018.
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index 347bde20c..647f7e85b 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -1,8 +1,8 @@
 POT: Python Optimal Transport
 =============================
 
-|PyPI version| |Build Status| |Documentation Status| |Anaconda Cloud|
-|License| |Anaconda downloads|
+|PyPI version| |Anaconda Cloud| |Build Status| |Documentation Status|
+|Anaconda downloads| |License|
 
 This open source Python library provide several solvers for optimization
 problems related to Optimal Transport for signal, image processing and
@@ -311,15 +311,22 @@ approach to object
 matching <https://media.adelaide.edu.au/acvt/Publications/2011/2011-Gromov%E2%80%93Wasserstein%20Distances%20and%20the%20Metric%20Approach%20to%20Object%20Matching.pdf>`__.
 Foundations of computational mathematics 11.4 (2011): 417-487.
 
+[14] Knott, M. and Smith, C. S. `On the optimal mapping of
+distributions <https://link.springer.com/article/10.1007/BF00934745>`__,
+Journal of Optimization Theory and Applications Vol 43, 1984.
+
+[15] Peyré, G., & Cuturi, M. (2017). `Computational Optimal
+Transport <https://arxiv.org/pdf/1803.00567.pdf>`__ , 2018.
+
 .. |PyPI version| image:: https://badge.fury.io/py/POT.svg
    :target: https://badge.fury.io/py/POT
+.. |Anaconda Cloud| image:: https://anaconda.org/conda-forge/pot/badges/version.svg
+   :target: https://anaconda.org/conda-forge/pot
 .. |Build Status| image:: https://travis-ci.org/rflamary/POT.svg?branch=master
    :target: https://travis-ci.org/rflamary/POT
 .. |Documentation Status| image:: https://readthedocs.org/projects/pot/badge/?version=latest
    :target: http://pot.readthedocs.io/en/latest/?badge=latest
-.. |Anaconda Cloud| image:: https://anaconda.org/conda-forge/pot/badges/version.svg
+.. |Anaconda downloads| image:: https://anaconda.org/conda-forge/pot/badges/downloads.svg
    :target: https://anaconda.org/conda-forge/pot
 .. |License| image:: https://anaconda.org/conda-forge/pot/badges/license.svg
    :target: https://github.com/rflamary/POT/blob/master/LICENSE
-.. |Anaconda downloads| image:: https://anaconda.org/conda-forge/pot/badges/downloads.svg
-   :target: https://anaconda.org/conda-forge/pot

From 83c706cb6b1c9eb6ca033c58532b85c13b5d40f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:54:00 +0100
Subject: [PATCH 36/42] pep cleanup

---
 ot/da.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ot/da.py b/ot/da.py
index 5d62d43ea..cdebc91bc 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -692,8 +692,8 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
     .. [14] Knott, M. and Smith, C. S. "On the optimal mapping of
         distributions", Journal of Optimization Theory and Applications
         Vol 43, 1984
-        
-    .. [15]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal 
+
+    .. [15]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
         Transport", 2018.
 
 
@@ -1293,7 +1293,7 @@ class LinearTransport(BaseTransport):
     The function estimate the optimal linear operator that align the two
     empirical distributions. This is equivalent to estimating the closed
     form mapping between two Gaussian distribution :math:`N(\mu_s,\Sigma_s)`
-    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in 
+    and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in
     remark 2.29 in [15].
 
     The linear operator from source to target :math:`M`
@@ -1324,8 +1324,8 @@ class LinearTransport(BaseTransport):
     .. [14] Knott, M. and Smith, C. S. "On the optimal mapping of
         distributions", Journal of Optimization Theory and Applications
         Vol 43, 1984
-        
-    .. [15]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal 
+
+    .. [15]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
         Transport", 2018.
 
     """

From 69c7d1cb64a5628c69a3c1533991741bcd91f96b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 10:58:20 +0100
Subject: [PATCH 37/42] pep8 unused variable

---
 ot/externals/funcsigs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ot/externals/funcsigs.py b/ot/externals/funcsigs.py
index 82a55dac5..c73fdc96d 100644
--- a/ot/externals/funcsigs.py
+++ b/ot/externals/funcsigs.py
@@ -99,7 +99,7 @@ def signature(obj):
         partial_keywords = obj.keywords or {}
         try:
             ba = sig.bind_partial(*partial_args, **partial_keywords)
-        except TypeError as ex:
+        except TypeError:
             msg = 'partial object {0!r} has incorrect arguments'.format(obj)
             raise ValueError(msg)
 

From 7681db5c19817cfd003cea9ffdd95fedb9b00650 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 21 Mar 2018 11:03:06 +0100
Subject: [PATCH 38/42] update reame

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index fb7ab2a93..353fe32df 100644
--- a/README.md
+++ b/README.md
@@ -13,14 +13,14 @@ This open source Python library provide several solvers for optimization problem
 
 It provides the following solvers:
 
-* OT solver for the linear program/ Earth Movers Distance [1].
+* OT Network Flow solver for the linear program/ Earth Movers Distance [1].
 * Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (required cudamat).
 * Bregman projections for Wasserstein barycenter [3] and unmixing [4].
 * Optimal transport for domain adaptation with group lasso regularization [5]
 * Conditional gradient [6] and Generalized conditional gradient for regularized OT [7].
-* Joint OT matrix and mapping estimation [8].
+* Linear OT [14] and Joint OT matrix and mapping estimation [8].
 * Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt).
-* Gromov-Wasserstein distances and barycenters [12]
+* Gromov-Wasserstein distances and barycenters ([13] and regularized [12])
 
 Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder.
 

From aa12256ee9545d136472d544cefefa26160ac1f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 2 May 2018 14:48:51 +0200
Subject: [PATCH 39/42] add automatically documentation to gu_fun

---
 ot/utils.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/ot/utils.py b/ot/utils.py
index 6041adde8..38bab66bd 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -112,6 +112,7 @@ def __call__(self, obj):
             return self._decorate_fun(obj)
 
     def _decorate_class(self, cls):
+        print("Warning gpu class not implemented")
         return cls
 
     def _decorate_fun(self, fun):
@@ -151,8 +152,19 @@ def wrapped(*args, **kwargs):
 
     def _update_doc(self, olddoc):
 
+        tofind="""
+
+    Returns
+    -------"""
         # TODO!!!
-        newdoc = olddoc
+        
+        newdoc="""
+    gpu : bool, optional
+        compute on gpu if True
+    to_np : bool, optional
+        convert back gpu array to numpy if True"""+tofind
+        
+        newdoc = olddoc.replace(tofind,newdoc)
         return newdoc
 
 

From aa24c1de39f0a3b9a33af1bab5b3d0280ecd2791 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 2 May 2018 15:34:12 +0200
Subject: [PATCH 40/42] pep8 util function

---
 ot/utils.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/ot/utils.py b/ot/utils.py
index 38bab66bd..b3c0fcf76 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -135,7 +135,7 @@ def wrapped(*args, **kwargs):
             ret = fun(*args, **kwargs)
 
             if cp and to_np:
-                if type(ret) == tuple:
+                if isinstance(ret, tuple):
                     ret = (cp.asnumpy(r) if i in self.out_arrays else r
                            for i, r in enumerate(ret))
                 else:
@@ -152,19 +152,18 @@ def wrapped(*args, **kwargs):
 
     def _update_doc(self, olddoc):
 
-        tofind="""
+        tofind = """
 
     Returns
     -------"""
-        # TODO!!!
-        
-        newdoc="""
+
+        newdoc = """
     gpu : bool, optional
         compute on gpu if True
     to_np : bool, optional
-        convert back gpu array to numpy if True"""+tofind
-        
-        newdoc = olddoc.replace(tofind,newdoc)
+        convert back gpu array to numpy if True""" + tofind
+
+        newdoc = olddoc.replace(tofind, newdoc)
         return newdoc
 
 
@@ -490,7 +489,7 @@ def _is_deprecated(func):
         closures = []
     is_deprecated = ('deprecated' in ''.join([c.cell_contents
                                               for c in closures
-                     if isinstance(c.cell_contents, str)]))
+                                              if isinstance(c.cell_contents, str)]))
     return is_deprecated
 
 

From 6f964e42f6c31accda770ad5acaa73a6853407cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 2 May 2018 15:56:46 +0200
Subject: [PATCH 41/42] better decorate

---
 ot/utils.py        | 9 +++++++--
 test/test_utils.py | 7 +++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/ot/utils.py b/ot/utils.py
index b3c0fcf76..52cf8992b 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -8,13 +8,16 @@
 # License: MIT License
 
 import multiprocessing
-from functools import reduce
+from functools import reduce,wraps
 import time
 
 import numpy as np
 from scipy.spatial.distance import cdist
 import sys
 import warnings
+
+from .externals.decorator import decorate
+
 try:
     import cupy as cp
 except ImportError:
@@ -117,7 +120,7 @@ def _decorate_class(self, cls):
 
     def _decorate_fun(self, fun):
         """Decorate function fun"""
-
+        @wraps(fun)
         def wrapped(*args, **kwargs):
             if cp and 'gpu' in kwargs and kwargs['gpu']:
                 # convert args to gpu whose index are in in_arrays
@@ -143,6 +146,8 @@ def wrapped(*args, **kwargs):
                         ret = cp.asnumpy(ret)
 
             return ret
+        
+        wrapped=decorate(fun,wrapped)
 
         wrapped.__name__ = fun.__name__
         wrapped.__dict__ = fun.__dict__
diff --git a/test/test_utils.py b/test/test_utils.py
index 1bd37cdc5..457b454f0 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -123,3 +123,10 @@ def test_clean_zeros():
 
     assert len(a) == n - nz
     assert len(b) == n - nz2
+
+
+def test_gpu_fun():
+
+    n = 100
+
+    A = np.ones((n, n))

From 90636c44d278de19ae68534bdb63003d9bde874d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 2 May 2018 16:46:49 +0200
Subject: [PATCH 42/42] working decorator for gpu

---
 ot/externals/decorator.py | 432 ++++++++++++++++++++++++++++++++++++++
 ot/utils.py               |   9 +-
 test/test_utils.py        |   5 +-
 3 files changed, 440 insertions(+), 6 deletions(-)
 create mode 100644 ot/externals/decorator.py

diff --git a/ot/externals/decorator.py b/ot/externals/decorator.py
new file mode 100644
index 000000000..84ea38678
--- /dev/null
+++ b/ot/externals/decorator.py
@@ -0,0 +1,432 @@
+# #########################     LICENSE     ############################ #
+
+# Copyright (c) 2005-2018, Michele Simionato
+# All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+
+#   Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+#   Redistributions in bytecode form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+# DAMAGE.
+
+"""
+Decorator module, see http://pypi.python.org/pypi/decorator
+for the documentation.
+"""
+from __future__ import print_function
+
+import re
+import sys
+import inspect
+import operator
+import itertools
+import collections
+
+__version__ = '4.3.0'
+
+if sys.version >= '3':
+    from inspect import getfullargspec
+
+    def get_init(cls):
+        return cls.__init__
+else:
+    FullArgSpec = collections.namedtuple(
+        'FullArgSpec', 'args varargs varkw defaults '
+        'kwonlyargs kwonlydefaults annotations')
+
+    def getfullargspec(f):
+        "A quick and dirty replacement for getfullargspec for Python 2.X"
+        return FullArgSpec._make(inspect.getargspec(f) + ([], None, {}))
+
+    def get_init(cls):
+        return cls.__init__.__func__
+
+try:
+    iscoroutinefunction = inspect.iscoroutinefunction
+except AttributeError:
+    # let's assume there are no coroutine functions in old Python
+    def iscoroutinefunction(f):
+        return False
+
+
+DEF = re.compile(r'\s*def\s*([_\w][_\w\d]*)\s*\(')
+
+
+# basic functionality
+class FunctionMaker(object):
+    """
+    An object with the ability to create functions with a given signature.
+    It has attributes name, doc, module, signature, defaults, dict and
+    methods update and make.
+    """
+
+    # Atomic get-and-increment provided by the GIL
+    _compile_count = itertools.count()
+
+    # make pylint happy
+    args = varargs = varkw = defaults = kwonlyargs = kwonlydefaults = ()
+
+    def __init__(self, func=None, name=None, signature=None,
+                 defaults=None, doc=None, module=None, funcdict=None):
+        self.shortsignature = signature
+        if func:
+            # func can be a class or a callable, but not an instance method
+            self.name = func.__name__
+            if self.name == '<lambda>':  # small hack for lambda functions
+                self.name = '_lambda_'
+            self.doc = func.__doc__
+            self.module = func.__module__
+            if inspect.isfunction(func):
+                argspec = getfullargspec(func)
+                self.annotations = getattr(func, '__annotations__', {})
+                for a in ('args', 'varargs', 'varkw', 'defaults', 'kwonlyargs',
+                          'kwonlydefaults'):
+                    setattr(self, a, getattr(argspec, a))
+                for i, arg in enumerate(self.args):
+                    setattr(self, 'arg%d' % i, arg)
+                allargs = list(self.args)
+                allshortargs = list(self.args)
+                if self.varargs:
+                    allargs.append('*' + self.varargs)
+                    allshortargs.append('*' + self.varargs)
+                elif self.kwonlyargs:
+                    allargs.append('*')  # single star syntax
+                for a in self.kwonlyargs:
+                    allargs.append('%s=None' % a)
+                    allshortargs.append('%s=%s' % (a, a))
+                if self.varkw:
+                    allargs.append('**' + self.varkw)
+                    allshortargs.append('**' + self.varkw)
+                self.signature = ', '.join(allargs)
+                self.shortsignature = ', '.join(allshortargs)
+                self.dict = func.__dict__.copy()
+        # func=None happens when decorating a caller
+        if name:
+            self.name = name
+        if signature is not None:
+            self.signature = signature
+        if defaults:
+            self.defaults = defaults
+        if doc:
+            self.doc = doc
+        if module:
+            self.module = module
+        if funcdict:
+            self.dict = funcdict
+        # check existence required attributes
+        assert hasattr(self, 'name')
+        if not hasattr(self, 'signature'):
+            raise TypeError('You are decorating a non function: %s' % func)
+
+    def update(self, func, **kw):
+        "Update the signature of func with the data in self"
+        func.__name__ = self.name
+        func.__doc__ = getattr(self, 'doc', None)
+        func.__dict__ = getattr(self, 'dict', {})
+        func.__defaults__ = self.defaults
+        func.__kwdefaults__ = self.kwonlydefaults or None
+        func.__annotations__ = getattr(self, 'annotations', None)
+        try:
+            frame = sys._getframe(3)
+        except AttributeError:  # for IronPython and similar implementations
+            callermodule = '?'
+        else:
+            callermodule = frame.f_globals.get('__name__', '?')
+        func.__module__ = getattr(self, 'module', callermodule)
+        func.__dict__.update(kw)
+
+    def make(self, src_templ, evaldict=None, addsource=False, **attrs):
+        "Make a new function from a given template and update the signature"
+        src = src_templ % vars(self)  # expand name and signature
+        evaldict = evaldict or {}
+        mo = DEF.search(src)
+        if mo is None:
+            raise SyntaxError('not a valid function template\n%s' % src)
+        name = mo.group(1)  # extract the function name
+        names = set([name] + [arg.strip(' *') for arg in
+                              self.shortsignature.split(',')])
+        for n in names:
+            if n in ('_func_', '_call_'):
+                raise NameError('%s is overridden in\n%s' % (n, src))
+
+        if not src.endswith('\n'):  # add a newline for old Pythons
+            src += '\n'
+
+        # Ensure each generated function has a unique filename for profilers
+        # (such as cProfile) that depend on the tuple of (<filename>,
+        # <definition line>, <function name>) being unique.
+        filename = '<decorator-gen-%d>' % (next(self._compile_count),)
+        try:
+            code = compile(src, filename, 'single')
+            exec(code, evaldict)
+        except Exception:
+            print('Error in generated code:', file=sys.stderr)
+            print(src, file=sys.stderr)
+            raise
+        func = evaldict[name]
+        if addsource:
+            attrs['__source__'] = src
+        self.update(func, **attrs)
+        return func
+
+    @classmethod
+    def create(cls, obj, body, evaldict, defaults=None,
+               doc=None, module=None, addsource=True, **attrs):
+        """
+        Create a function from the strings name, signature and body.
+        evaldict is the evaluation dictionary. If addsource is true an
+        attribute __source__ is added to the result. The attributes attrs
+        are added, if any.
+        """
+        if isinstance(obj, str):  # "name(signature)"
+            name, rest = obj.strip().split('(', 1)
+            signature = rest[:-1]  # strip a right parens
+            func = None
+        else:  # a function
+            name = None
+            signature = None
+            func = obj
+        self = cls(func, name, signature, defaults, doc, module)
+        ibody = '\n'.join('    ' + line for line in body.splitlines())
+        caller = evaldict.get('_call_')  # when called from `decorate`
+        if caller and iscoroutinefunction(caller):
+            body = ('async def %(name)s(%(signature)s):\n' + ibody).replace(
+                'return', 'return await')
+        else:
+            body = 'def %(name)s(%(signature)s):\n' + ibody
+        return self.make(body, evaldict, addsource, **attrs)
+
+
+def decorate(func, caller, extras=()):
+    """
+    decorate(func, caller) decorates a function using a caller.
+    """
+    evaldict = dict(_call_=caller, _func_=func)
+    es = ''
+    for i, extra in enumerate(extras):
+        ex = '_e%d_' % i
+        evaldict[ex] = extra
+        es += ex + ', '
+    fun = FunctionMaker.create(
+        func, "return _call_(_func_, %s%%(shortsignature)s)" % es,
+        evaldict, __wrapped__=func)
+    if hasattr(func, '__qualname__'):
+        fun.__qualname__ = func.__qualname__
+    return fun
+
+
+def decorator(caller, _func=None):
+    """decorator(caller) converts a caller function into a decorator"""
+    if _func is not None:  # return a decorated function
+        # this is obsolete behavior; you should use decorate instead
+        return decorate(_func, caller)
+    # else return a decorator function
+    defaultargs, defaults = '', ()
+    if inspect.isclass(caller):
+        name = caller.__name__.lower()
+        doc = 'decorator(%s) converts functions/generators into ' \
+            'factories of %s objects' % (caller.__name__, caller.__name__)
+    elif inspect.isfunction(caller):
+        if caller.__name__ == '<lambda>':
+            name = '_lambda_'
+        else:
+            name = caller.__name__
+        doc = caller.__doc__
+        nargs = caller.__code__.co_argcount
+        ndefs = len(caller.__defaults__ or ())
+        defaultargs = ', '.join(caller.__code__.co_varnames[nargs - ndefs:nargs])
+        if defaultargs:
+            defaultargs += ','
+        defaults = caller.__defaults__
+    else:  # assume caller is an object with a __call__ method
+        name = caller.__class__.__name__.lower()
+        doc = caller.__call__.__doc__
+    evaldict = dict(_call=caller, _decorate_=decorate)
+    dec = FunctionMaker.create(
+        '%s(%s func)' % (name, defaultargs),
+        'if func is None: return lambda func:  _decorate_(func, _call, (%s))\n'
+        'return _decorate_(func, _call, (%s))' % (defaultargs, defaultargs),
+        evaldict, doc=doc, module=caller.__module__, __wrapped__=caller)
+    if defaults:
+        dec.__defaults__ = defaults + (None,)
+    return dec
+
+
+# ####################### contextmanager ####################### #
+
+try:  # Python >= 3.2
+    from contextlib import _GeneratorContextManager
+except ImportError:  # Python >= 2.5
+    from contextlib import GeneratorContextManager as _GeneratorContextManager
+
+
+class ContextManager(_GeneratorContextManager):
+    def __call__(self, func):
+        """Context manager decorator"""
+        return FunctionMaker.create(
+            func, "with _self_: return _func_(%(shortsignature)s)",
+            dict(_self_=self, _func_=func), __wrapped__=func)
+
+
+init = getfullargspec(_GeneratorContextManager.__init__)
+n_args = len(init.args)
+if n_args == 2 and not init.varargs:  # (self, genobj) Python 2.7
+    def __init__(self, g, *a, **k):
+        return _GeneratorContextManager.__init__(self, g(*a, **k))
+    ContextManager.__init__ = __init__
+elif n_args == 2 and init.varargs:  # (self, gen, *a, **k) Python 3.4
+    pass
+elif n_args == 4:  # (self, gen, args, kwds) Python 3.5
+    def __init__(self, g, *a, **k):
+        return _GeneratorContextManager.__init__(self, g, a, k)
+    ContextManager.__init__ = __init__
+
+_contextmanager = decorator(ContextManager)
+
+
+def contextmanager(func):
+    # Enable Pylint config: contextmanager-decorators=decorator.contextmanager
+    return _contextmanager(func)
+
+
+# ############################ dispatch_on ############################ #
+
+def append(a, vancestors):
+    """
+    Append ``a`` to the list of the virtual ancestors, unless it is already
+    included.
+    """
+    add = True
+    for j, va in enumerate(vancestors):
+        if issubclass(va, a):
+            add = False
+            break
+        if issubclass(a, va):
+            vancestors[j] = a
+            add = False
+    if add:
+        vancestors.append(a)
+
+
+# inspired from simplegeneric by P.J. Eby and functools.singledispatch
+def dispatch_on(*dispatch_args):
+    """
+    Factory of decorators turning a function into a generic function
+    dispatching on the given arguments.
+    """
+    assert dispatch_args, 'No dispatch args passed'
+    dispatch_str = '(%s,)' % ', '.join(dispatch_args)
+
+    def check(arguments, wrong=operator.ne, msg=''):
+        """Make sure one passes the expected number of arguments"""
+        if wrong(len(arguments), len(dispatch_args)):
+            raise TypeError('Expected %d arguments, got %d%s' %
+                            (len(dispatch_args), len(arguments), msg))
+
+    def gen_func_dec(func):
+        """Decorator turning a function into a generic function"""
+
+        # first check the dispatch arguments
+        argset = set(getfullargspec(func).args)
+        if not set(dispatch_args) <= argset:
+            raise NameError('Unknown dispatch arguments %s' % dispatch_str)
+
+        typemap = {}
+
+        def vancestors(*types):
+            """
+            Get a list of sets of virtual ancestors for the given types
+            """
+            check(types)
+            ras = [[] for _ in range(len(dispatch_args))]
+            for types_ in typemap:
+                for t, type_, ra in zip(types, types_, ras):
+                    if issubclass(t, type_) and type_ not in t.mro():
+                        append(type_, ra)
+            return [set(ra) for ra in ras]
+
+        def ancestors(*types):
+            """
+            Get a list of virtual MROs, one for each type
+            """
+            check(types)
+            lists = []
+            for t, vas in zip(types, vancestors(*types)):
+                n_vas = len(vas)
+                if n_vas > 1:
+                    raise RuntimeError(
+                        'Ambiguous dispatch for %s: %s' % (t, vas))
+                elif n_vas == 1:
+                    va, = vas
+                    mro = type('t', (t, va), {}).mro()[1:]
+                else:
+                    mro = t.mro()
+                lists.append(mro[:-1])  # discard t and object
+            return lists
+
+        def register(*types):
+            """
+            Decorator to register an implementation for the given types
+            """
+            check(types)
+
+            def dec(f):
+                check(getfullargspec(f).args, operator.lt, ' in ' + f.__name__)
+                typemap[types] = f
+                return f
+            return dec
+
+        def dispatch_info(*types):
+            """
+            An utility to introspect the dispatch algorithm
+            """
+            check(types)
+            lst = []
+            for anc in itertools.product(*ancestors(*types)):
+                lst.append(tuple(a.__name__ for a in anc))
+            return lst
+
+        def _dispatch(dispatch_args, *args, **kw):
+            types = tuple(type(arg) for arg in dispatch_args)
+            try:  # fast path
+                f = typemap[types]
+            except KeyError:
+                pass
+            else:
+                return f(*args, **kw)
+            combinations = itertools.product(*ancestors(*types))
+            next(combinations)  # the first one has been already tried
+            for types_ in combinations:
+                f = typemap.get(types_)
+                if f is not None:
+                    return f(*args, **kw)
+
+            # else call the default implementation
+            return func(*args, **kw)
+
+        return FunctionMaker.create(
+            func, 'return _f_(%s, %%(shortsignature)s)' % dispatch_str,
+            dict(_f_=_dispatch), register=register, default=func,
+            typemap=typemap, vancestors=vancestors, ancestors=ancestors,
+            dispatch_info=dispatch_info, __wrapped__=func)
+
+    gen_func_dec.__name__ = 'dispatch_on' + dispatch_str
+    return gen_func_dec
diff --git a/ot/utils.py b/ot/utils.py
index b5980024a..f2263a737 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -8,7 +8,7 @@
 # License: MIT License
 
 import multiprocessing
-from functools import reduce,wraps
+from functools import reduce, wraps
 import time
 
 import numpy as np
@@ -16,7 +16,7 @@
 import sys
 import warnings
 
-from .externals.decorator import decorate
+#from .externals.decorator import decorate
 
 try:
     import cupy as cp
@@ -149,10 +149,9 @@ def wrapped(*args, **kwargs):
                     if 0 in self.out_arrays:
                         ret = cp.asnumpy(ret)
 
-            return ret
-        
-        wrapped=decorate(fun,wrapped)
+            return ret  # noqa: F821
 
+        #wrapped = decorate(fun, _wrapped)
         wrapped.__name__ = fun.__name__
         wrapped.__dict__ = fun.__dict__
         wrapped.__doc__ = self._update_doc(fun.__doc__)
diff --git a/test/test_utils.py b/test/test_utils.py
index 35731edc0..a52778c6e 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -131,7 +131,10 @@ def test_gpu_fun():
     n = 100
 
     A = np.ones((n, n))
-    
+
+    A.sum()
+
+
 def test_cost_normalization():
 
     C = np.random.rand(10, 10)