# Tests on the projection onto the set of real-valued matrices with prescribed row and column sums

In [1]:
import numpy  as np
import pandas as pd
import time
import os
import matplotlib.pyplot as plt

# Particular functions
from numpy import zeros, zeros_like, allclose, where, ones, inf, absolute, linspace, tile, maximum, newaxis, broadcast_to
from numpy.random import default_rng as rng
from numba import jit
from scipy.spatial.distance import cdist
from scipy.linalg import norm

Set dimensions of problem and create randomised instance:

In [2]:
N = 50*50
M = 65*65
n = rng(0).uniform(0.1,1,N)
n/= n.sum()
m = rng(0).uniform(0.1,1,M)
m/= m.sum()
N, M = n.size, m.size
c = (n.reshape(N,1) + m).T

Initialise variables and compute desired result

In [3]:
τ = 1e-3 * 1.9
    
# Initialise σ
σ = 1.0/τ - 1e-5

# Initialise ρ
ρ = 1.9 #- 1e-4 # this helped in 8x8 but not for bigger colour instances

# Fetch lengths of m and n.
N = n.size
M = m.size
    
x, y = zeros((2,M,N)) + 1;
xₖ, yₖ, xₚ, u = zeros((4,M,N))
κ_1, κ_2 = zeros(M), zeros(N)
        
xₖ = x - τ * (c + y);    maximum(xₖ, 0.0, xₖ)

u = y/σ + 2.0*xₖ - x
u += 0.1                # To have at least a non-zero value

κ_1 = u.sum(1) - m
κ_2 = u.sum(0) - n

β_1 = κ_1.sum() / (M + N)
β_2 = κ_2.sum() / (M + N)

yₖ = σ*(tile( (κ_1 - β_1)/N, (N,1)).T + tile( (κ_2 - β_2)/M, (M,1)))

Testing different ways of computing the projection complement:

In [4]:
# Original
print(4, allclose(yₖ, σ*(tile( (κ_1 - β_1)/N, (N,1)).T + tile( (κ_2 - β_2)/M, (M,1))) ))
# Broadcasting between two shapes
print(3, allclose(yₖ, σ*((κ_1 - β_1).reshape(M,1)/N + (κ_2 - β_2)/M) ))
# Broadcasting between two shapes but with pre-multiplication by σ
print(2, allclose(yₖ, ( ( (κ_1 - β_1)*σ/N).reshape(M,1) + σ*(κ_2 - β_2)/M) ))
# Using a new axis object instead of reshaping
print(1, allclose(yₖ, ( ( (κ_1 - β_1)*σ/N)[..., newaxis] + σ*(κ_2 - β_2)/M) ))

4 True
3 True
2 True
1 True


Let's time the different methods:

In [5]:
%timeit -r 10 -n 200  ( ( (κ_1 - β_1)*σ/N)[..., newaxis] + σ*(κ_2 - β_2)/M)

7.32 ms ± 65.6 µs per loop (mean ± std. dev. of 10 runs, 200 loops each)


In [6]:
%timeit -r 10 -n 200 ( ( (κ_1 - β_1)*σ/N).reshape(M,1) + σ*(κ_2 - β_2)/M)

7.09 ms ± 12.8 µs per loop (mean ± std. dev. of 10 runs, 200 loops each)


In [7]:
%timeit -r 10 -n 200 σ*((κ_1 - β_1).reshape(M,1)/N + (κ_2 - β_2)/M)

10.1 ms ± 13.3 µs per loop (mean ± std. dev. of 10 runs, 200 loops each)


In [8]:
%timeit -r 10 -n 200 σ*(tile( (κ_1 - β_1)/N, (N,1)).T + tile( (κ_2 - β_2)/M, (M,1)))

26.8 ms ± 660 µs per loop (mean ± std. dev. of 10 runs, 200 loops each)


Alternative by ```broadcast_to```:

In [12]:
print(5, allclose(yₖ, σ*(broadcast_to((κ_1 - β_1)/N, (N, M)).T + broadcast_to((κ_2 - β_2)/M, (M, N))) ) )

5 True


The difference in performance between tiling and broadcasting is impressive:

In [13]:
%timeit -r 10 -n 200 broadcast_to((κ_1 - β_1)/N, (N, M)).T
%timeit -r 10 -n 200 tile( (κ_1 - β_1)/N, (N,1)).T

21.2 µs ± 11.1 µs per loop (mean ± std. dev. of 10 runs, 200 loops each)
3.25 ms ± 98.4 µs per loop (mean ± std. dev. of 10 runs, 200 loops each)


However using ```broadcast_to``` does exactly the same as method 2 above (and even has additional overhead).

In [14]:
%timeit -r 10 -n 200 σ*(broadcast_to((κ_1 - β_1)/N, (N, M)).T + broadcast_to((κ_2 - β_2)/M, (M, N)))

10.1 ms ± 57.1 µs per loop (mean ± std. dev. of 10 runs, 200 loops each)


---