In [1]:
import pyopencl as cl
import pyopencl.characterize.performance as perf

In [2]:
%load_ext pyopencl.ipython_ext

In [3]:
def main():
    ctx = cl.create_some_context()

    prof_overhead, latency = perf.get_profiling_overhead(ctx)
    print("command latency: %g s" % latency)
    print("profiling overhead: {:g} s -> {:.1f} %".format(prof_overhead, 100*prof_overhead/latency))
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
    print("empty kernel: %g s" % perf.get_empty_kernel_time(queue))
    print("float32 add: %g GOps/s" % (perf.get_add_rate(queue)/1e9))

    for tx_type in [perf.HostToDeviceTransfer,perf.DeviceToHostTransfer,perf.DeviceToDeviceTransfer]:
        print("----------------------------------------")
        print(tx_type.__name__)
        print("----------------------------------------")

        print("latency: %g s" % perf.transfer_latency(queue, tx_type))
        for i in range(6, 31, 2):
            bs = 1 << i
            try:
                result = "%g GB/s" % (perf.transfer_bandwidth(queue, tx_type, bs)/1e9)
            except Exception as e:
                result = "exception: %s" % e.__class__.__name__
            print("bandwidth @ %d bytes: %s" % (bs, result))

In [4]:
main()

command latency: 1.23888e-05 s
profiling overhead: 2.72714e-06 s -> 22.0 %
empty kernel: 1.59573e-05 s
20 1246030.455014912 0
40 4395278.090697126 0
80 9167349.251985563 0
160 19897925.855918463 0
320 39873437.27428863 0
640 71301622.2031124 0
1280 172560026.67770204 0
2560 345121439.9588583 0
5120 684495289.0637548 0
10240 678755126.1218015 0
float32 add: 342.248 GOps/s
----------------------------------------
HostToDeviceTransfer
----------------------------------------
latency: 5.45415e-05 s
bandwidth @ 64 bytes: 0.000877552 GB/s
bandwidth @ 256 bytes: 0.00380271 GB/s
bandwidth @ 1024 bytes: 0.0132462 GB/s
bandwidth @ 4096 bytes: 0.0680951 GB/s
bandwidth @ 16384 bytes: 0.163249 GB/s
bandwidth @ 65536 bytes: 0.790509 GB/s
bandwidth @ 262144 bytes: 2.41698 GB/s
bandwidth @ 1048576 bytes: 5.25693 GB/s
bandwidth @ 4194304 bytes: 6.90138 GB/s
bandwidth @ 16777216 bytes: 6.53288 GB/s
bandwidth @ 67108864 bytes: 6.24328 GB/s
bandwidth @ 268435456 bytes: 5.2801 GB/s
bandwidth @ 1073741824 b