In [1]:
import numpy as np
import torch as tch
import time

# Compare performance of np and tch on matrix eigenvalue decomposition

def test_np(N):
    J = np.random.normal(scale=1./np.sqrt(N), size=(N,N)).astype(dtype=np.float32)
    J = (J + J.T) / np.sqrt(2)
    spectrum, _ = np.linalg.eig(J)

def test_tch_cpu(N):
    J = tch.normal(tch.zeros([N,N], dtype=tch.float32), 1./np.sqrt(N))
    J = (J + J.t()) / np.sqrt(2)
    spectrum, _ = tch.eig(J)

def test_tch_gpu(N):
    J = tch.normal(tch.zeros([N,N], dtype=tch.float32).cuda(), 1./np.sqrt(N))
    J = (J + J.t()) / np.sqrt(2)
    spectrum, _ = tch.eig(J)

def profile(func):
    plop = np.zeros(10)
    for i in range(10):
        tic = time.time()
        func()
        plop[i] = time.time() - tic
    return np.mean(plop), np.std(plop)


for N in [10, 100, 200, 350, 400, 500, 600, 650, 1000]:


    np_lambda = lambda: test_np(N)
    tch_cpu_lambda = lambda: test_tch_cpu(N)
    tch_gpu_lambda = lambda: test_tch_gpu(N)

    if N <= 1000:
        m, v = profile(np_lambda)
        print('For N = {}, numpy took {:.2e} pm {:.2e}'.format(N, m, v))
    m, v = profile(tch_cpu_lambda)
    print('For N = {}, pytorch on CPU took {:.2e} pm {:.2e}'.format(N, m, v))
    m, v = profile(tch_gpu_lambda)
    print('For N = {}, pytorch on GPU took {:.2e} pm {:.2e}'.format(N, m, v))

For N = 10, numpy took 2.27e-04 pm 4.30e-04
For N = 10, pytorch on CPU took 1.39e-04 pm 3.03e-04
For N = 10, pytorch on GPU took 1.74e-01 pm 5.14e-01
For N = 100, numpy took 7.19e-03 pm 1.37e-03
For N = 100, pytorch on CPU took 2.44e-03 pm 1.41e-04
For N = 100, pytorch on GPU took 8.41e-03 pm 9.89e-04
For N = 200, numpy took 3.39e-02 pm 4.23e-03
For N = 200, pytorch on CPU took 1.15e-02 pm 3.68e-04
For N = 200, pytorch on GPU took 1.94e-02 pm 1.11e-03
For N = 350, numpy took 8.51e-02 pm 1.34e-03
For N = 350, pytorch on CPU took 3.29e-02 pm 9.85e-04
For N = 350, pytorch on GPU took 4.05e-02 pm 1.70e-03
For N = 400, numpy took 1.13e-01 pm 4.61e-03
For N = 400, pytorch on CPU took 4.17e-02 pm 9.60e-04
For N = 400, pytorch on GPU took 4.84e-02 pm 7.83e-04
For N = 500, numpy took 1.84e-01 pm 6.38e-03
For N = 500, pytorch on CPU took 6.74e-02 pm 1.76e-03
For N = 500, pytorch on GPU took 7.25e-02 pm 2.09e-03
For N = 600, numpy took 3.03e-01 pm 9.48e-03
For N = 600, pytorch on CPU took 1.20e-0

Conclusion :
Should use tch_cpu for small matrices (3x speedup wrt np), and gpu only when very large matrices (N>~600)
Numpy is never a good choice...