In [22]:
import cupy as cp
import numpy as np
import time

In [23]:
print(cp.cuda.runtime.getDeviceCount())
cp.cuda.Device(0).use()

1


In [29]:
s = time.time()
for i in range(100):
    u = cp.random.random((1000,1000))
    d = cp.linalg.inv(u)
    r = cp.linalg.norm(cp.ones((1000,1000))-d*u)
e = time.time()
print(e-s)

5.635090351104736


In [27]:
s = time.time()
for i in range(100):
    u = np.random.random((1000,1000))
    d = np.linalg.inv(u)
    r = np.linalg.norm(np.ones((1000,1000))-d*u)
e = time.time()
print(e-s)

5.920733451843262


In [7]:
!set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5

In [41]:
### Numpy and CPU
s = time.time()
x_cpu = np.ones((400,400,400))
e = time.time()
print(e - s)

### CuPy and GPU
s = time.time()
x_gpu = cp.ones((400,400,400))
cp.cuda.Stream.null.synchronize()
e = time.time()
print(e - s)

0.11594510078430176
0.10025477409362793


In [42]:
### Numpy and CPU
s = time.time()
x_cpu *= 5
e = time.time()
print(e - s)

### CuPy and GPU
s = time.time()
x_gpu *= 5
cp.cuda.Stream.null.synchronize()
e = time.time()
print(e - s)

0.08469271659851074
0.15117931365966797


In [43]:
### Numpy and CPU
s = time.time()
x_cpu *= 5
x_cpu *= x_cpu
x_cpu += x_cpu
e = time.time()
print(e - s)

### CuPy and GPU
s = time.time()
x_gpu *= 5
x_gpu *= x_gpu
x_gpu += x_gpu
cp.cuda.Stream.null.synchronize()
e = time.time()
print(e - s)

0.2282717227935791
0.5245921611785889


In [52]:
### Numpy and CPU
s = time.time()
x_cpu = np.random.random((4000,4000))
inv_x_cpu = np.linalg.inv(x_cpu)
uns = np.matmul(x_cpu,inv_x_cpu)
e = time.time()
print(e - s)

### CuPy and GPU
s = time.time()
x_gpu = cp.random.random((4000,4000))
inv_x_gpu = cp.linalg.inv(x_gpu)
uns = cp.matmul(x_gpu,inv_x_gpu)
e = time.time()
print(e - s)

2.670926332473755
1.385422945022583


In [57]:
%%timeit
x_cpu = np.random.random((4000,4000))
inv_x_cpu = np.linalg.inv(x_cpu)
uns = np.matmul(x_cpu,inv_x_cpu)

2.84 s ± 73 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [59]:
%%timeit
x_gpu = cp.random.random((4000,4000))
inv_x_gpu = cp.linalg.inv(x_gpu)
uns = cp.matmul(x_gpu,inv_x_gpu)

3.44 s ± 11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [62]:
%%timeit
x_cpu = np.random.random((3000,3000))
np.fft.fft(x_cpu)

204 ms ± 6.18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [63]:
%%timeit
x_gpu = cp.random.random((3000,3000))
cp.fft.fft(x_gpu)

23.8 ms ± 37.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [76]:
%%timeit
u_cpu = np.random.rand(100000)
v_cpu = np.random.rand(100000)
w = np.convolve(u_cpu,v_cpu)

1.09 s ± 59.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [75]:
%%timeit
u_gpu = cp.random.rand(100000)
v_gpu = cp.random.rand(100000)
w = cp.convolve(u_gpu,v_gpu)

1.22 ms ± 7.28 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
