In [1]:
import pyopencl as cl
import pyopencl.characterize.performance as perf

In [2]:
%load_ext pyopencl.ipython_ext

In [3]:
def main():
    ctx = cl.create_some_context()

    prof_overhead, latency = perf.get_profiling_overhead(ctx)
    print("command latency: %g s" % latency)
    print("profiling overhead: {:g} s -> {:.1f} %".format(prof_overhead, 100*prof_overhead/latency))
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
    print("empty kernel: %g s" % perf.get_empty_kernel_time(queue))
    print("float32 add: %g GOps/s" % (perf.get_add_rate(queue)/1e9))

    for tx_type in [perf.HostToDeviceTransfer,perf.DeviceToHostTransfer,perf.DeviceToDeviceTransfer]:
        print("----------------------------------------")
        print(tx_type.__name__)
        print("----------------------------------------")

        print("latency: %g s" % perf.transfer_latency(queue, tx_type))
        for i in range(6, 31, 2):
            bs = 1 << i
            try:
                result = "%g GB/s" % (perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
            except Exception as e:
                result = "exception: %s" % e.__class__.__name__
            print("bandwidth @ %d bytes: %s" % (bs, result))

In [4]:
main()

command latency: 1.24043e-05 s
profiling overhead: 2.55568e-06 s -> 20.6 %
empty kernel: 1.62066e-05 s
20 1270713.9176630285 0
40 4487476.445532174 0
80 9333983.774609197 0
160 19898110.225714393 0
320 39873529.81833964 0
640 78979370.8909612 0
1280 171124461.46184033 0
2560 333898305.69613856 0
5120 690245653.1469094 0
10240 1283420677.1253548 0
20480 2714950782.6877112 0
40960 5054761069.216624 0
81920 10409676094.22101 0
163840 21544392889.073513 0
327680 30039095565.755928 0
655360 42252800898.30926 0
1310720 48901822247.469734 0
2621440 52899204939.90116 0
5242880 55723671681.119026 0
10485760 57596208893.452065 0
float32 add: 28798.1 GOps/s
----------------------------------------
HostToDeviceTransfer
----------------------------------------
latency: 6.7943e-05 s
bandwidth @ 64 bytes: 0.000896714 GB/s
bandwidth @ 256 bytes: 0.00427807 GB/s
bandwidth @ 1024 bytes: 0.015211 GB/s
bandwidth @ 4096 bytes: 0.0602853 GB/s
bandwidth @ 16384 bytes: 0.24114 GB/s
bandwidth @ 65536 bytes: 0.