In [2]:
import numpy as np
import torch as tch
import time

# Compare performance of np and tch on matrix eigenvalue decomposition

def test_np(N):
    J = np.random.normal(scale=1./np.sqrt(N), size=(N,N)).astype(dtype=np.float32)
    J = (J + J.T) / np.sqrt(2)
    spectrum, _ = np.linalg.eig(J)

def test_tch_cpu(N):
    J = tch.normal(tch.zeros([N,N], dtype=tch.float32), 1./np.sqrt(N))
    J = (J + J.t()) / np.sqrt(2)
    spectrum, _ = tch.eig(J)

def test_tch_gpu(N):
    J = tch.normal(tch.zeros([N,N], dtype=tch.float32).cuda(), 1./np.sqrt(N))
    J = (J + J.t()) / np.sqrt(2)
    spectrum, _ = tch.eig(J)

def profile(func):
    plop = np.zeros(10)
    for i in range(10):
        tic = time.time()
        func()
        plop[i] = time.time() - tic
    return np.mean(plop), np.std(plop)


for N in [10, 100, 200, 350, 400, 500, 600, 650, 1000]:


    np_lambda = lambda: test_np(N)
    tch_cpu_lambda = lambda: test_tch_cpu(N)
#     tch_gpu_lambda = lambda: test_tch_gpu(N)

    if N <= 1000:
        m, v = profile(np_lambda)
        print('For N = {}, numpy took {:.2e} pm {:.2e}'.format(N, m, v))
    m, v = profile(tch_cpu_lambda)
    print('For N = {}, pytorch on CPU took {:.2e} pm {:.2e}'.format(N, m, v))
#     m, v = profile(tch_gpu_lambda)
#     print('For N = {}, pytorch on GPU took {:.2e} pm {:.2e}'.format(N, m, v))

For N = 10, numpy took 9.38e-04 pm 1.92e-03
For N = 10, pytorch on CPU took 1.12e-04 pm 6.61e-05
For N = 100, numpy took 3.20e-02 pm 2.69e-02
For N = 100, pytorch on CPU took 5.56e-03 pm 5.39e-03
For N = 200, numpy took 1.58e-01 pm 1.22e-01
For N = 200, pytorch on CPU took 4.13e-02 pm 5.63e-02
For N = 350, numpy took 4.33e-01 pm 1.74e-01
For N = 350, pytorch on CPU took 1.57e-01 pm 7.73e-02
For N = 400, numpy took 5.31e-01 pm 1.63e-01
For N = 400, pytorch on CPU took 2.20e-01 pm 6.77e-02
For N = 500, numpy took 5.51e-01 pm 1.15e-01
For N = 500, pytorch on CPU took 3.97e-01 pm 1.69e-01
For N = 600, numpy took 9.79e-01 pm 1.21e-01
For N = 600, pytorch on CPU took 4.86e-01 pm 1.34e-01
For N = 650, numpy took 1.15e+00 pm 2.07e-01
For N = 650, pytorch on CPU took 4.96e-01 pm 1.82e-01
For N = 1000, numpy took 2.09e+00 pm 1.80e-01
For N = 1000, pytorch on CPU took 1.29e+00 pm 2.93e-01


Conclusion :
Should use tch_cpu for small matrices (3x speedup wrt np), and gpu only when very large matrices (N>~600)
Numpy is never a good choice...

In [3]:
import numpy as np
import torch as tch
import time

# Compare performance of np and tch on matrix eigenvalue decomposition for symmetric matrices

def test_np(N):
    J = np.random.normal(scale=1./np.sqrt(N), size=(N,N)).astype(dtype=np.float32)
    J = (J + J.T) / np.sqrt(2)
    spectrum, _ = np.linalg.eigh(J)

def test_tch_cpu(N):
    J = tch.normal(tch.zeros([N,N], dtype=tch.float32), 1./np.sqrt(N))
    J = (J + J.t()) / np.sqrt(2)
    spectrum, _ = tch.symeig(J)

def test_tch_gpu(N):
    J = tch.normal(tch.zeros([N,N], dtype=tch.float32).cuda(), 1./np.sqrt(N))
    J = (J + J.t()) / np.sqrt(2)
    spectrum, _ = tch.symeig(J)

def profile(func):
    plop = np.zeros(10)
    for i in range(10):
        tic = time.time()
        func()
        plop[i] = time.time() - tic
    return np.mean(plop), np.std(plop)


for N in [10, 100, 200, 350, 400, 500, 600, 650, 1000]:

    np_lambda = lambda: test_np(N)
    tch_cpu_lambda = lambda: test_tch_cpu(N)
#     tch_gpu_lambda = lambda: test_tch_gpu(N)

    if N <= 1000:
        m, v = profile(np_lambda)
        print('For N = {}, numpy took {:.2e} pm {:.2e}'.format(N, m, v))
    m, v = profile(tch_cpu_lambda)
    print('For N = {}, pytorch on CPU took {:.2e} pm {:.2e}'.format(N, m, v))
#     m, v = profile(tch_gpu_lambda)
#     print('For N = {}, pytorch on GPU took {:.2e} pm {:.2e}'.format(N, m, v))

For N = 10, numpy took 1.73e-04 pm 2.29e-04
For N = 10, pytorch on CPU took 8.18e-05 pm 7.72e-05
For N = 100, numpy took 2.82e-02 pm 2.06e-02
For N = 100, pytorch on CPU took 7.01e-04 pm 1.36e-04
For N = 200, numpy took 4.73e-02 pm 1.51e-02
For N = 200, pytorch on CPU took 6.34e-03 pm 2.78e-03
For N = 350, numpy took 7.06e-02 pm 5.78e-02
For N = 350, pytorch on CPU took 1.76e-02 pm 1.22e-02
For N = 400, numpy took 1.04e-01 pm 4.56e-02
For N = 400, pytorch on CPU took 2.19e-02 pm 2.18e-02
For N = 500, numpy took 1.75e-01 pm 1.43e-01
For N = 500, pytorch on CPU took 4.63e-02 pm 6.62e-02
For N = 600, numpy took 2.05e-01 pm 7.97e-02
For N = 600, pytorch on CPU took 4.58e-02 pm 2.84e-02
For N = 650, numpy took 2.26e-01 pm 1.08e-01
For N = 650, pytorch on CPU took 6.87e-02 pm 4.47e-02
For N = 1000, numpy took 4.91e-01 pm 1.78e-01
For N = 1000, pytorch on CPU took 2.24e-01 pm 9.60e-02


In [None]:
Conclusion :
Use symeig, MUCH faster on large matrices