From 7791cee40bf1bed80486fc7747dc14fb47dbc5a1 Mon Sep 17 00:00:00 2001 From: clvincen Date: Fri, 4 Aug 2023 14:28:44 +0200 Subject: [PATCH 1/4] fix gromov doc --- ot/gromov/_bregman.py | 9 ++-- ot/gromov/_estimators.py | 5 ++- ot/gromov/_gw.py | 89 ++++++++++++++++++++++++--------------- ot/gromov/_semirelaxed.py | 24 ++++++----- 4 files changed, 77 insertions(+), 50 deletions(-) diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py index 18cef568b..10a0d65c2 100644 --- a/ot/gromov/_bregman.py +++ b/ot/gromov/_bregman.py @@ -225,8 +225,9 @@ def entropic_gromov_wasserstein2( C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, G0=None, max_iter=1000, tol=1e-9, solver='PGD', warmstart=False, verbose=False, log=False, **kwargs): r""" - Returns the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the Gromov-Wasserstein discrepancy :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` estimated using Sinkhorn projections. + The Gromov-Wasserstein distance as defined in [13] satisfies :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. If `solver="PGD"`, the function solves the following entropic-regularized Gromov-Wasserstein optimization problem using Projected Gradient Descent [12]: @@ -351,7 +352,7 @@ def entropic_gromov_barycenters( .. math:: - \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s) + \mathbf{C}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s) Where : @@ -700,7 +701,7 @@ def entropic_fused_gromov_wasserstein2( symmetric=None, alpha=0.5, G0=None, max_iter=1000, tol=1e-9, solver='PGD', warmstart=False, verbose=False, log=False, **kwargs): r""" - Returns the Fused Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` + Returns the Fused Gromov-Wasserstein distance between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}`, estimated using Sinkhorn projections. @@ -832,7 +833,7 @@ def entropic_fused_gromov_barycenters( .. math:: - \mathbf{C}, \mathbf{Y} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}, \mathbf{Y}\in \mathbb{Y}^{N \times d}} \quad \sum_s \lambda_s \mathrm{FGW}_{\alpha}(\mathbf{C}, \mathbf{C}_s, \mathbf{Y}, \mathbf{Y}_s, \mathbf{p}, \mathbf{p}_s) + \mathbf{C}^*, \mathbf{Y}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}, \mathbf{Y}\in \mathbb{Y}^{N \times d}} \quad \sum_s \lambda_s \mathrm{FGW}_{\alpha}(\mathbf{C}, \mathbf{C}_s, \mathbf{Y}, \mathbf{Y}_s, \mathbf{p}, \mathbf{p}_s) Where : diff --git a/ot/gromov/_estimators.py b/ot/gromov/_estimators.py index 0a29a918b..9407ecf64 100644 --- a/ot/gromov/_estimators.py +++ b/ot/gromov/_estimators.py @@ -20,14 +20,15 @@ def GW_distance_estimation(C1, C2, p, q, loss_fun, T, nb_samples_p=None, nb_samples_q=None, std=True, random_state=None): r""" - Returns an approximation of the gromov-wasserstein cost between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns an approximation of the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` with a fixed transport plan :math:`\mathbf{T}`. + An approximation of the Gromov-Wasserstein distance as defined in [13] satisfies :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. The function gives an unbiased approximation of the following equation: .. math:: - GW = \sum_{i,j,k,l} L(\mathbf{C_{1}}_{i,k}, \mathbf{C_{2}}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + \mathbf{GW} = \sum_{i,j,k,l} L(\mathbf{C_{1}}_{i,k}, \mathbf{C_{2}}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} Where : diff --git a/ot/gromov/_gw.py b/ot/gromov/_gw.py index adf6b82b1..5b935e34f 100644 --- a/ot/gromov/_gw.py +++ b/ot/gromov/_gw.py @@ -26,9 +26,9 @@ def gromov_wasserstein(C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`. - The function solves the following optimization problem: + The function solves the following optimization problem using Conditional Gradient: .. math:: \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l} @@ -182,9 +182,10 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs): def gromov_wasserstein2(C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the Gromov-Wasserstein discrepancy :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`. + The Gromov-Wasserstein distance as defined in [13] satisfies :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. - The function solves the following optimization problem: + The function solves the following optimization problem using Conditional Gradient: .. math:: \mathbf{GW} = \min_\mathbf{T} \quad \sum_{i,j,k,l} @@ -308,10 +309,13 @@ def gromov_wasserstein2(C1, C2, p=None, q=None, loss_fun='square_loss', symmetri def fused_gromov_wasserstein(M, C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, alpha=0.5, armijo=False, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Computes the FGW transport between two graphs (see :ref:`[24] `) + Returns the Fused Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` + with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}` (see :ref:`[24] `). + + The function solves the following optimization problem using Conditional Gradient: .. math:: - \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + + \mathbf{T}^* \in\mathop{\arg\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} @@ -319,20 +323,22 @@ def fused_gromov_wasserstein(M, C1, C2, p=None, q=None, loss_fun='square_loss', \mathbf{T}^T \mathbf{1} &= \mathbf{q} \mathbf{T} &\geq 0 + Where : - where : - - - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix - - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1) - - `L` is a loss function to account for the misfit between the similarity matrices - + - :math:`\mathbf{M}`: metric cost matrix between features across domains + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - `L`: loss function to account for the misfit between the similarity and feature matrices + - :math:`\alpha`: trade-off parameter + .. note:: This function is backend-compatible and will work on arrays from all compatible backends. But the algorithm uses the C++ CPU backend which can lead to copy overhead on GPU arrays. .. note:: All computations in the conjugate gradient solver are done with numpy to limit memory overhead. - The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] ` Parameters ---------- @@ -465,26 +471,32 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs): def fused_gromov_wasserstein2(M, C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, alpha=0.5, armijo=False, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Computes the FGW distance between two graphs see (see :ref:`[24] `) + Returns the Fused Gromov-Wasserstein distance between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` + with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}` (see :ref:`[24] `). - .. math:: - \mathbf{GW} = \min_\mathbf{T} \quad (1 - \alpha) \langle \mathbf(T), \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l} - L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - - s.t. \ \mathbf(T)\mathbf{1} &= \mathbf{p} + The function solves the following optimization problem using Conditional Gradient: - \mathbf(T)^T \mathbf{1} &= \mathbf{q} + .. math:: + \mathbf{FGW} = \mathop{\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + + \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \mathbf(T) &\geq 0 + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} - where : + \mathbf{T}^T \mathbf{1} &= \mathbf{q} - - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix - - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1) - - `L` is a loss function to account for the misfit between the similarity matrices + \mathbf{T} &\geq 0 + Where : - The algorithm used for solving the problem is conditional gradient as - discussed in :ref:`[24] ` + - :math:`\mathbf{M}`: metric cost matrix between features across domains + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - `L`: loss function to account for the misfit between the similarity and feature matrices + - :math:`\alpha`: trade-off parameter + + Note that when using backends, this loss function is differentiable wrt the + matrices (C1, C2, M) and weights (p, q) for quadratic loss using the gradients from [38]_. .. note:: This function is backend-compatible and will work on arrays from all compatible backends. But the algorithm uses the C++ CPU backend @@ -492,9 +504,6 @@ def fused_gromov_wasserstein2(M, C1, C2, p=None, q=None, loss_fun='square_loss', .. note:: All computations in the conjugate gradient solver are done with numpy to limit memory overhead. - Note that when using backends, this loss function is differentiable wrt the - matrices (C1, C2, M) and weights (p, q) for quadratic loss using the gradients from [38]_. - Parameters ---------- M : array-like, shape (ns, nt) @@ -668,13 +677,13 @@ def gromov_barycenters( max_iter=1000, tol=1e-9, warmstartT=False, verbose=False, log=False, init_C=None, random_state=None, **kwargs): r""" - Returns the gromov-wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}` + Returns the Gromov-Wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}` The function solves the following optimization problem with block coordinate descent: .. math:: - \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s) + \mathbf{C}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s) Where : @@ -812,8 +821,22 @@ def fgw_barycenters( fixed_features=False, p=None, loss_fun='square_loss', armijo=False, symmetric=True, max_iter=100, tol=1e-9, warmstartT=False, verbose=False, log=False, init_C=None, init_X=None, random_state=None, **kwargs): - r"""Compute the fgw barycenter as presented eq (5) in :ref:`[24] ` + r""" + Returns the Fused Gromov-Wasserstein barycenters of `S` measurable networks with node features :math:`(\mathbf{C}_s, \mathbf{Y}_s, \mathbf{p}_s)_{1 \leq s \leq S}` + (see eq (5) in :ref:`[24] `), estimated using Fused Gromov-Wasserstein transports from Conditional Gradient solvers. + The function solves the following optimization problem: + + .. math:: + + \mathbf{C}^*, \mathbf{Y}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}, \mathbf{Y}\in \mathbb{Y}^{N \times d}} \quad \sum_s \lambda_s \mathrm{FGW}_{\alpha}(\mathbf{C}, \mathbf{C}_s, \mathbf{Y}, \mathbf{Y}_s, \mathbf{p}, \mathbf{p}_s) + + Where : + + - :math:`\mathbf{Y}_s`: feature matrix + - :math:`\mathbf{C}_s`: metric cost matrix + - :math:`\mathbf{p}_s`: distribution + Parameters ---------- N : int diff --git a/ot/gromov/_semirelaxed.py b/ot/gromov/_semirelaxed.py index 206329dfc..b36a81c75 100644 --- a/ot/gromov/_semirelaxed.py +++ b/ot/gromov/_semirelaxed.py @@ -21,12 +21,12 @@ def semirelaxed_gromov_wasserstein(C1, C2, p=None, loss_fun='square_loss', symmetric=None, log=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the semi-relaxed Gromov-Wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` + Returns the semi-relaxed Gromov-Wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` (see [48]). - The function solves the following optimization problem: + The function solves the following optimization problem using Conditional Gradient: .. math:: - \mathbf{T}^^* \in \mathop{\arg \min}_{\mathbf{T}} \quad \sum_{i,j,k,l} + \mathbf{T}^* \in \mathop{\arg \min}_{\mathbf{T}} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} @@ -152,9 +152,9 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs): def semirelaxed_gromov_wasserstein2(C1, C2, p=None, loss_fun='square_loss', symmetric=None, log=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the semi-relaxed gromov-wasserstein divergence from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` + Returns the semi-relaxed Gromov-Wasserstein divergence from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` (see [48]). - The function solves the following optimization problem: + The function solves the following optimization problem using Conditional Gradient: .. math:: \text{srGW} = \min_{\mathbf{T}} \quad \sum_{i,j,k,l} @@ -255,7 +255,7 @@ def semirelaxed_fused_gromov_wasserstein( M, C1, C2, p=None, loss_fun='square_loss', symmetric=None, alpha=0.5, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Computes the semi-relaxed FGW transport between two graphs (see :ref:`[48] `) + Computes the semi-relaxed Fused Gromov-Wasserstein transport between two graphs (see [48]). .. math:: \mathbf{T}^* \in \mathop{\arg \min}_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + @@ -395,10 +395,10 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs): def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p=None, loss_fun='square_loss', symmetric=None, alpha=0.5, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Computes the semi-relaxed FGW divergence between two graphs (see :ref:`[48] `) + Computes the semi-relaxed FGW divergence between two graphs (see [48]). .. math:: - \mathbf{srFGW} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + + \mathbf{srFGW}_{\alpha} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) T_{i,j} T_{k,l} s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} @@ -511,7 +511,7 @@ def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p=None, loss_fun='square_lo def solve_semirelaxed_gromov_linesearch(G, deltaG, cost_G, C1, C2, ones_p, M, reg, alpha_min=None, alpha_max=None, nx=None, **kwargs): """ - Solve the linesearch in the FW iterations + Solve the linesearch in the Conditional Gradient iterations for the semi-relaxed Gromov-Wasserstein divergence. Parameters ---------- @@ -829,6 +829,7 @@ def entropic_semirelaxed_fused_gromov_wasserstein( alpha=0.5, G0=None, max_iter=1e4, tol=1e-9, log=False, verbose=False, **kwargs): r""" Computes the entropic-regularized semi-relaxed FGW transport between two graphs (see :ref:`[48] `) + estimated using a Mirror Descent algorithm following the KL geometry. .. math:: \mathbf{T}^* \in \mathop{\arg \min}_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + @@ -988,10 +989,11 @@ def entropic_semirelaxed_fused_gromov_wasserstein2( M, C1, C2, p=None, loss_fun='square_loss', symmetric=None, epsilon=0.1, alpha=0.5, G0=None, max_iter=1e4, tol=1e-9, log=False, verbose=False, **kwargs): r""" - Computes the entropic-regularized semi-relaxed FGW transport between two graphs (see :ref:`[48] `) + Computes the entropic-regularized semi-relaxed FGW divergence between two graphs (see :ref:`[48] `) + estimated using a Mirror Descent algorithm following the KL geometry. .. math:: - \mathbf{srFGW} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + + \mathbf{srFGW}_{\alpha} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} From ff4715ae754025de7cb1002c84361cbfd20565c9 Mon Sep 17 00:00:00 2001 From: clvincen Date: Fri, 4 Aug 2023 14:38:22 +0200 Subject: [PATCH 2/4] fix pep8 + update RELEASE --- RELEASES.md | 1 + ot/gromov/_gw.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/RELEASES.md b/RELEASES.md index 6a2cdf9f4..c8e735db9 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -33,6 +33,7 @@ Many other bugs and issues have been fixed and we want to thank all the contribu #### Closed issues +- Fix gromov conventions (PR #497) - Fix change in scipy API for `cdist` (PR #487) - More permissive check_backend (PR #494) - Fix circleci-redirector action and codecov (PR #460) diff --git a/ot/gromov/_gw.py b/ot/gromov/_gw.py index 5b935e34f..10f77104e 100644 --- a/ot/gromov/_gw.py +++ b/ot/gromov/_gw.py @@ -332,7 +332,7 @@ def fused_gromov_wasserstein(M, C1, C2, p=None, q=None, loss_fun='square_loss', - :math:`\mathbf{q}`: distribution in the target space - `L`: loss function to account for the misfit between the similarity and feature matrices - :math:`\alpha`: trade-off parameter - + .. note:: This function is backend-compatible and will work on arrays from all compatible backends. But the algorithm uses the C++ CPU backend which can lead to copy overhead on GPU arrays. @@ -494,7 +494,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p=None, q=None, loss_fun='square_loss', - :math:`\mathbf{q}`: distribution in the target space - `L`: loss function to account for the misfit between the similarity and feature matrices - :math:`\alpha`: trade-off parameter - + Note that when using backends, this loss function is differentiable wrt the matrices (C1, C2, M) and weights (p, q) for quadratic loss using the gradients from [38]_. @@ -836,7 +836,7 @@ def fgw_barycenters( - :math:`\mathbf{Y}_s`: feature matrix - :math:`\mathbf{C}_s`: metric cost matrix - :math:`\mathbf{p}_s`: distribution - + Parameters ---------- N : int From f44f81bab08cbe0c58cdd5a760d76141e2b30408 Mon Sep 17 00:00:00 2001 From: clvincen Date: Fri, 4 Aug 2023 14:59:50 +0200 Subject: [PATCH 3/4] improve doc --- ot/gromov/_bregman.py | 3 +-- ot/gromov/_estimators.py | 3 +-- ot/gromov/_gw.py | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py index 10a0d65c2..fe249639e 100644 --- a/ot/gromov/_bregman.py +++ b/ot/gromov/_bregman.py @@ -226,8 +226,7 @@ def entropic_gromov_wasserstein2( tol=1e-9, solver='PGD', warmstart=False, verbose=False, log=False, **kwargs): r""" Returns the Gromov-Wasserstein discrepancy :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` - estimated using Sinkhorn projections. - The Gromov-Wasserstein distance as defined in [13] satisfies :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. + estimated using Sinkhorn projections. To recover the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. If `solver="PGD"`, the function solves the following entropic-regularized Gromov-Wasserstein optimization problem using Projected Gradient Descent [12]: diff --git a/ot/gromov/_estimators.py b/ot/gromov/_estimators.py index 9407ecf64..7ea7e2a8c 100644 --- a/ot/gromov/_estimators.py +++ b/ot/gromov/_estimators.py @@ -21,8 +21,7 @@ def GW_distance_estimation(C1, C2, p, q, loss_fun, T, nb_samples_p=None, nb_samples_q=None, std=True, random_state=None): r""" Returns an approximation of the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` - with a fixed transport plan :math:`\mathbf{T}`. - An approximation of the Gromov-Wasserstein distance as defined in [13] satisfies :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. + with a fixed transport plan :math:`\mathbf{T}`. To recover an approximation of the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. The function gives an unbiased approximation of the following equation: diff --git a/ot/gromov/_gw.py b/ot/gromov/_gw.py index 10f77104e..20373f33b 100644 --- a/ot/gromov/_gw.py +++ b/ot/gromov/_gw.py @@ -182,8 +182,8 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs): def gromov_wasserstein2(C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the Gromov-Wasserstein discrepancy :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`. - The Gromov-Wasserstein distance as defined in [13] satisfies :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. + Returns the Gromov-Wasserstein loss :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`. + To recover the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. The function solves the following optimization problem using Conditional Gradient: From d9b7fe614a6a1134e758581a3f7fe3c70edd0771 Mon Sep 17 00:00:00 2001 From: clvincen Date: Fri, 4 Aug 2023 16:20:14 +0200 Subject: [PATCH 4/4] merge --- ot/gromov/_bregman.py | 2 +- ot/gromov/_estimators.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py index fe249639e..792ed6e86 100644 --- a/ot/gromov/_bregman.py +++ b/ot/gromov/_bregman.py @@ -225,7 +225,7 @@ def entropic_gromov_wasserstein2( C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, G0=None, max_iter=1000, tol=1e-9, solver='PGD', warmstart=False, verbose=False, log=False, **kwargs): r""" - Returns the Gromov-Wasserstein discrepancy :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the Gromov-Wasserstein loss :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` estimated using Sinkhorn projections. To recover the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. If `solver="PGD"`, the function solves the following entropic-regularized diff --git a/ot/gromov/_estimators.py b/ot/gromov/_estimators.py index 7ea7e2a8c..7e12ef930 100644 --- a/ot/gromov/_estimators.py +++ b/ot/gromov/_estimators.py @@ -20,7 +20,7 @@ def GW_distance_estimation(C1, C2, p, q, loss_fun, T, nb_samples_p=None, nb_samples_q=None, std=True, random_state=None): r""" - Returns an approximation of the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns an approximation of the Gromov-Wasserstein loss between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` with a fixed transport plan :math:`\mathbf{T}`. To recover an approximation of the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. The function gives an unbiased approximation of the following equation: