In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cupy as cp
import h5py

plt.rc('figure', figsize=(6,4))
plt.rc('font', size=11)
plt.style.use('dark_background')
# plt.rc('figure',facecolor=(0,0,0,0)) # Set transparent background
%config InlineBackend.figure_format='retina'

In [2]:
import quflow as qf
from cupyx.profiler import benchmark
#qf.gpu.check_status()
_tridiagonal_laplacian_cp_cache2 = dict()



In [4]:
def get_random_mat_cp(N=5,seed=None):
    if seed != None:
        cp.random.seed(seed)

    W = cp.random.randn(N, N) + 1j*cp.random.randn(N, N)
    W -= W.conj().T
    W -= cp.eye(N)*cp.trace(W)/N
    return W

def get_random_mat_np(N=5,seed=None):
    if seed != None:
        np.random.seed(seed)

    W = np.random.randn(N, N) + 1j*np.random.randn(N, N)
    W -= W.conj().T
    W -= np.eye(N)*np.trace(W)/N
    return W

def mult(A,B,C):
    cp.matmul(A,B,out=C)

In [10]:
N = 4000 # Size of matrices
n_repeat = 1000

# Cupy arrays
W0_cp = get_random_mat_cp(N)


P0_lm = cp.zeros_like(W0_cp)
ham_lm = qf.gpu.solve_poisson_interleaved_lessmemory_cp(N)

P0_c = cp.zeros_like(W0_cp)
ham_c = qf.gpu.solve_poisson_interleaved_cp(N)

P0_tf = cp.zeros_like(W0_cp)
ham_tf = qf.gpu.solve_poisson_cp(N)


print("Self made kernel with interleaved memory and overwriting:")
print(benchmark(ham_lm.solve_poisson,(W0_cp,P0_lm),n_repeat=n_repeat))

print("Self made kernel with interleaved memory and caching underway:")
print(benchmark(ham_c.solve_poisson,(W0_cp,P0_c),n_repeat=n_repeat))

print("Tensorflow tridiag:")
print(benchmark(ham_tf.solve_poisson,(W0_cp,P0_tf),n_repeat=n_repeat))

print(cp.linalg.norm(P0_c-P0_lm))
print(cp.linalg.norm(P0_c-P0_tf))


Self made kernel with interleaved memory and overwriting:
solve_poisson       :    CPU: 17394.087 us   +/- 237.593 (min: 16503.000 / max: 19401.500) us     GPU-0: 20106.439 us   +/- 578.614 (min: 18922.016 / max: 22625.792) us
Self made kernel with interleaved memory and caching underway:
solve_poisson       :    CPU: 16785.306 us   +/- 473.828 (min: 15545.700 / max: 24712.800) us     GPU-0: 19508.107 us   +/- 688.172 (min: 18436.096 / max: 27279.585) us
Tensorflow tridiag:
solve_poisson       :    CPU: 36484.831 us   +/- 504.977 (min: 34840.500 / max: 40532.300) us     GPU-0: 39811.536 us   +/- 797.668 (min: 38632.095 / max: 43580.959) us
4.368593325090479e-15
1.6500057833171564e-09


In [6]:
N = 4000 # Size of matrices
n_repeat = 1000

# Cupy arrays
W0 = get_random_mat_np(N)
ham_c = qf.gpu.solve_poisson_interleaved_cp(N)
gpu_method = qf.gpu.isomp_gpu_skewherm_solver(W0)

time = 3.0 # in second
inner_time = 0.5 # in seconds
qstepsize = 0.2 # in qtime

steps = 2200 
inner_steps = 200
method_kwargs = {"hamiltonian": ham_c.solve_poisson, "verbatim":False, "maxit":7, "tol":1e-8}
qf.solve(W0, steps = 1, inner_steps=1, method=gpu_method.solve_step, method_kwargs=method_kwargs)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 1/1 [00:33<00:00, 33.79s/ steps]
