# Matrix Multipliation with Scaling and Centering 

In [1]:
import numpy as np
from numpy import newaxis
from lmdec.array.core.matrix_ops import MatrixMultiplier, diag_dot, sym_mat_mult
from dask.array import broadcast_to, blockwise
import dask.array as da

In [2]:
n, p = 6, 10
k = 3
A = np.arange(n*p).reshape(n,p).astype(float)
A[1, :] = np.random.randint(1,10, size=p)
A[2, :] = (10*np.random.randn(p)).astype(int)
A[:,3] = 4
A[3,3] = 5
x = np.random.randn(n, k)
print(A)
print(np.linalg.matrix_rank(A))
A = da.array(A)
mu = A.mean(axis=0).compute()
std = A.std(axis=0).compute()
print(mu)

[[  0.   1.   2.   4.   4.   5.   6.   7.   8.   9.]
 [  8.   7.   9.   4.   3.   2.   1.   5.   7.   9.]
 [ -1.  -5.   1.   4.  -3.  -5.  10.   1. -12. -10.]
 [ 30.  31.  32.   5.  34.  35.  36.  37.  38.  39.]
 [ 40.  41.  42.   4.  44.  45.  46.  47.  48.  49.]
 [ 50.  51.  52.   4.  54.  55.  56.  57.  58.  59.]]
5
[21.16666667 21.         23.          4.16666667 22.66666667 22.83333333
 25.83333333 25.66666667 24.5        25.83333333]


In [3]:
std

array([19.90323815, 21.10292239, 20.01665973,  0.372678  , 22.20860694,
       23.0970176 , 21.13777556, 22.1710522 , 25.05826544, 24.70098963])

In [4]:
dotter = MatrixMultiplier(scale=True, center=True)
dotter.fit(A)

In [5]:
dotter.sym_mat_mult(x).compute()

array([[-1.42011431,  0.84835846, -2.40872334],
       [-0.84205094,  0.59753467, -1.84791952],
       [-1.8833617 ,  1.98440701, -3.23611781],
       [-2.42022831,  1.29077703,  5.46633713],
       [ 2.76032297, -2.00399447,  0.63299676],
       [ 3.80543229, -2.71708271,  1.39342678]])

In [6]:
for N in range(2, 5):
    for P in range(2, 5):
        array = np.random.rand(N, P) + 1
        std = np.diag(1/np.std(array, axis=0))
        mu = np.mean(array, axis=0)
        for K in range(1, 5):
            for squeeze in [True, False]:
                x = np.random.rand(N, K)
                if squeeze:
                    x = np.squeeze(x)

                # With No Scale or Center
                # x = A'Ax
                result = array.dot(array.T.dot(x))
                assert result.shape == x.shape
                dotter = MatrixMultiplier(scale=False, center=False)
                dotter.fit(da.array(array))
                np.testing.assert_array_equal(result, dotter.sym_mat_mult(x))

                # With Scale but No Center
                # B = AD
                b_array = array.dot(std)
                result = b_array.dot(b_array.T.dot(x))
                assert result.shape == x.shape
                dotter = MatrixMultiplier(scale=True, center=False)
                dotter.fit(da.array(array))
                np.testing.assert_array_almost_equal(result, dotter.sym_mat_mult(x))

                # With Center but No Scale:
                # B = (A - U)
                b_array = array - mu
                result = b_array.dot(b_array.T.dot(x))
                dotter = MatrixMultiplier(scale=False, center=True)
                dotter.fit(da.array(array))
                np.testing.assert_array_almost_equal(result, dotter.sym_mat_mult(x))

                # With Center and  Scale:
                # (A - U)'D'D(A - U)x
                result = (array - mu).dot(std).dot(std).dot((array - mu).T.dot(x))
                dotter = MatrixMultiplier(scale=True, center=True)
                dotter.fit(da.array(array))
                np.testing.assert_array_almost_equal(result, dotter.sym_mat_mult(x))

# Time Testing

In [7]:
N, P, K = 10000, 40000, 40
array = da.random.random((N, P))
print(array.shape)
x = da.random.normal(size=(N, K))
print(x.shape)

(10000, 40000)
(10000, 40)


In [8]:
%timeit sym_mat_mult(array.T, x, log=0).compute()

1.8 s ± 97.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
dotter = MatrixMultiplier(scale=True, center=True)
dotter.fit(array)

In [10]:
%timeit dotter.sym_mat_mult(x).compute()

2.15 s ± 70.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
dotter = MatrixMultiplier(scale=True, center=True)
dotter.fit(array, x)

In [12]:
%timeit dotter.sym_mat_mult(x).compute()

2.21 s ± 103 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
dotter = MatrixMultiplier(scale=True, center=True, factor='n')
dotter.fit(array, x)

In [15]:
dotter.sym_mat_mult(x).compute()

array([[ 8.29104071, -3.92924563, -5.7148464 , ...,  0.83622793,
         3.01799054, -1.38628175],
       [ 2.53218333, -2.52837632,  0.68656853, ...,  0.26579153,
         8.44176736,  3.67734918],
       [-4.17830276,  6.2991281 ,  3.34098754, ..., -1.63121347,
        -3.53299901,  3.13479579],
       ...,
       [-1.41816037, -2.04978041,  2.63308465, ...,  3.9189425 ,
         3.29796815, -3.16182578],
       [ 7.48106712,  6.60029325, -2.4488373 , ...,  7.67188935,
         0.74780379,  3.44678434],
       [-0.74315391,  4.57247133, -1.58825693, ..., -8.2200507 ,
         3.0037218 , -0.9391271 ]])