## Performance comparison

based on the Householder algorithm for orthogonal triangularization

In [1]:
import numpy as np

import numba

###  Test matrices

In [2]:
def generate_test_matrices(num=50):
    matrices = []
    
    def generate_matrix(m, n):
        re = np.random.rand(m, n)
        im = np.random.rand(m, n)
        return (re + 1j * im)
    
    for i in range(num):
        two_random_ints = np.random.randint(low=2, high=100, size=2)
        m, n = np.max(two_random_ints), np.min(two_random_ints)
        matrices.append(generate_matrix(m, n))
    
    return matrices

In [3]:
matrices  = generate_test_matrices(num=10)

In [6]:
matrices[1].shape

(25, 2)

### Python + Numpy implementation

In [4]:
def house(A):
    """
    Computes an implicit representation of a full QR factorization A = QR
    of an m x n matrix A with m ≥ n using Householder reﬂections.
    
    Returns
    -------
    - lower-triangular matrix W ∈ C m×n whose columns are the vectors v_k
        deﬁning the successive Householder reﬂections
    - triangular matrix R ∈ C n x n
    """
    m, n = A.shape
    assert m >= n
    
    R = np.copy(A).astype(complex)
    W = np.zeros_like(R, dtype=complex)
    
    for k in range(n):
        v_k = np.copy(R[k:, k])
        sgn = np.sign(v_k[0])
        if  sgn == 0: sgn = 1
        v_k[0] += np.exp(1j*np.angle(v_k[0])) * sgn * np.linalg.norm(np.abs(v_k))
        v_k /= np.linalg.norm(v_k)
        W[k:, k] = v_k
        R[k:, k:] -= 2 * np.outer(v_k, np.dot(v_k.conj().T, R[k:, k:]))           # 124 ms
        #R[k:, k:] -= 2 * np.dot(np.outer(v_k, v_k.conj().T), R[k:, k:]) # slower # 155 ms
    if m > n:
        R = np.copy(R[:n,:])
    return W, R

In [5]:
def formQ(W):
    """ generates a corresponding m × m orthogonal matrix Q
    """
    m, n = W.shape
    Q = np.eye(m, dtype=complex)
    
    for i in range(n):
        for k in range(n-1, -1, -1):
            v_k = W[k:, k]
            Q[k:, i] -= 2 * v_k * np.dot(v_k.conjugate(), Q[k:, i])
    return Q

In [8]:
def test_house(A):
    m, n = A.shape
    assert m >= n
    
    W, R = house(A)
    Q = formQ(W)
    
    assert np.allclose(A, Q[:, :n].dot(R))

test_house(matrices[0])

In [16]:
def run():
    for i in range(len(matrices)):
        test_house(matrices[i])

In [19]:
%timeit run()

221 ms ± 8.43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Python + Numpy + Numba implementation

In [20]:
@numba.jit(numba.types.UniTuple(numba.complex128[:,:], 2)(numba.complex128[:,:]), nopython=True)
def house(A):
    """
    Computes an implicit representation of a full QR factorization A = QR
    of an m x n matrix A with m ≥ n using Householder reﬂections.
    
    Returns
    -------
    - lower-triangular matrix W ∈ C m×n whose columns are the vectors v_k
        deﬁning the successive Householder reﬂections
    - upper-triangular matrix R ∈ C n x n
    """
    m, n = A.shape
    assert m >= n
    
    R = np.copy(A)
    W = np.zeros_like(R, dtype=numba.complex128)
    
    for k in range(n):
        v_k = np.copy(R[k:, k])
        sgn = np.sign(v_k[0])
        if  sgn == 0: sgn = 1
        v_k[0] += np.exp(1j*np.angle(v_k[0])) * sgn * np.linalg.norm(np.abs(v_k))
        v_k /= np.linalg.norm(v_k)
        W[k:, k] = v_k
        R[k:, k:] -= 2 * np.outer(v_k, np.dot(np.conjugate(v_k).T, R[k:, k:]))          # 28 ms
        #R[k:, k:] -= 2 * np.dot(np.outer(v_k, np.conjugate(v_k).T), R[k:, k:])  # slower 31.5 ms
    if m > n:
        R = np.copy(R[:n,:])
    return W, R

In [23]:
@numba.jit(numba.complex128[:,:](numba.complex128[:,:]), nopython=True)
def formQ(W):
    """
    Generates a corresponding m × m orthogonal matrix Q.
    """
    m, n = W.shape
    #np.eye(m, dtype=complex128) does not work
    Q = np.zeros((m, m), dtype=numba.complex128)
    for i in range(m):
        Q[i, i] = 1
    
    for i in range(n):
        for k in range(n-1, -1, -1):
            v_k = W[k:, k]
            Q[k:, i] -= 2 * v_k * np.dot(np.conjugate(v_k), Q[k:, i])
    return Q

In [24]:
%timeit run()

71.1 ms ± 3.99 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### C++ + xtensor implementation (+ pybind11 Python bindings)

In [1]:
import house_cpp
import numpy as np

In [2]:
house_cpp.house(np.ones((5,3)))

array([[ 0.+0.j,  0.+0.j,  0.+0.j],
       [ 0.+0.j,  0.+0.j,  0.+0.j],
       [ 0.+0.j,  0.+0.j,  0.+0.j],
       [ 0.+0.j,  0.+0.j,  0.+0.j],
       [ 0.+0.j,  0.+0.j,  0.+0.j]])

In [11]:
W, R = house(matrices[1])

In [10]:
W1, R1 = house_cpp.house(matrices[1])

(88, 78)

In [11]:
np.allclose(W, W1)

True

In [12]:
np.allclose(formQ(W), house_cpp.formQ(W))

True