In [1]:
import numpy as np
import cupy as cp

from toric_parallel_nojit import State as ParallelNojitState
from toric_parallel_nojit import mwpm as parallel_nojit_mwpm
# from toric_parallel_nojit import logical_error as parallel_nojit_logical_error

from toric_parallel import State as ParallelState
from toric_parallel import mwpm as parallel_mwpm
# from toric_parallel import logical_error as parallel_logical_error

from toric_nojit import State as NojitState
from toric_nojit import mwpm as nojit_mwpm
# from toric_nojit import logical_error as nojit_logical_error

from toric_gpu import State as GPUState
from toric_gpu import mwpm as gpu_mwpm
# from toric_gpu import logical_error as gpu_logical_error

from toric import State, pcm, mwpm, logical_error

from pymatching import Matching
from tqdm.notebook import tqdm

# Simulation parameters

N = 100 # Number of samples
L = 100 # Lattice size
p_error = 0.005 # Error probability per spin
η = 0.1 # Smoothing paramter for Jacobi method
c = 16 # "Field velocity" - number of field updates per cycle
T = 2 # Epochs

matching = Matching(pcm(L))

## No JIT, No Parallel

In [2]:
# Initialize first

state = NojitState(L)
state.add_errors(p_error)
state.update_field(η)
state.update_anyon()
nojit_mwpm(matching, state.q)

%timeit state = NojitState(L)
%timeit state.add_errors(p_error)
%timeit state.update_field(η)
%timeit state.update_anyon()
%timeit nojit_mwpm(matching, state.q)

4.22 μs ± 277 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
239 μs ± 10.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
124 μs ± 1.29 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
1.44 ms ± 44 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.67 ms ± 164 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## JIT, No Parallel

In [4]:
# Initialize first

state = State(L)
state.add_errors(p_error)
state.update_field(η)
state.update_anyon() # the only thing that's faster when JITed
mwpm(matching, state.q)

%timeit state = State(L)
%timeit state.add_errors(p_error)
%timeit state.update_field(η)
%timeit state.update_anyon()
%timeit mwpm(matching, state.q)

10.5 μs ± 460 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
222 μs ± 9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
175 μs ± 7.14 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
416 μs ± 28.7 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
1.84 ms ± 87.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## No JIT, Parallel

In [2]:
# Initialize first

state = ParallelNojitState(N, L)
state.add_errors(p_error) # 37.8 ms
state.update_field(η)
state.update_anyon() # Slowest part, big for loop
parallel_nojit_mwpm(matching, state.q)

%timeit state = ParallelNojitState(N, L)
%timeit state.add_errors(p_error)
%timeit state.update_field(η)
%timeit state.update_anyon()
%timeit parallel_nojit_mwpm(matching, state.q)

606 μs ± 39.2 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
35.8 ms ± 1.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
13.3 ms ± 907 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
The slowest run took 4.14 times longer than the fastest. This could mean that an intermediate result is being cached.
2.2 s ± 1.14 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
433 ms ± 32.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## JIT, Parallel

In [6]:
# Initialize first

state = ParallelState(N, L)
state.add_errors(p_error)
state.update_field(η) # This is the slowest part <-> laplace <-> roll
state.update_anyon()
parallel_mwpm(matching, state.q)

%timeit state = ParallelState(N, L)
%timeit state.add_errors(p_error)
%timeit state.update_field(η)
%timeit state.update_anyon()
%timeit parallel_mwpm(matching, state.q)

1.26 ms ± 30.6 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
31.4 ms ± 972 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## GPU

In [5]:
# state = GPUState(10, L)
# state.add_errors(0.01)
# state.update_field(η)
# state.update_anyon()
# # state.update_anyon_2()
# gpu_mwpm(matching, state.q.get())

# Initialize first

%timeit state = GPUState(10000, L)
%timeit state.add_errors(p_error)
%timeit state.update_field(η)
%timeit state.update_anyon()
# %timeit state.update_anyon_2()
# %timeit gpu_mwpm(matching, state.q.get())

2.5 ms ± 10.8 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
2.54 ms ± 912 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
780 μs ± 118 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
3.94 ms ± 620 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
