In [1]:
import tensorflow as tf
import timeit

In [2]:
# 这个方法会减少显存的负担，但是从内存把数据传输到显存中是非常慢的，这样做常常会减慢速度
with tf.device('/cpu:0'):
    cpu_a = tf.random.normal([10000, 1000])
    cpu_b = tf.random.normal([1000,2000])
    print(cpu_a.device, cpu_b.device)
    
with tf.device('/gpu:0'):
    gpu_a = tf.random.normal([10000, 1000])
    gpu_b = tf.random.normal([1000,2000])
    print(gpu_a.device, gpu_b.device)

def cpu_run():
    with tf.device('/cpu:0'):
        c = tf.matmul(cpu_a, cpu_b)
    return c

def gpu_run():
    with tf.device('/gpu:0'):
        c = tf.matmul(gpu_a, gpu_b)
    return c


/job:localhost/replica:0/task:0/device:CPU:0 /job:localhost/replica:0/task:0/device:CPU:0
/job:localhost/replica:0/task:0/device:GPU:0 /job:localhost/replica:0/task:0/device:GPU:0


In [5]:
# warm up(因为刚开始使用GPU时，GPU会有个初始化操作，所以为了出去初始化操作的影响，需要热身 )
cpu_time = timeit.timeit(cpu_run, number=10)
gpu_time = timeit.timeit(gpu_run, number=10)
print('warmup:',cpu_time, gpu_time)

cpu_time = timeit.timeit(cpu_run, number=10)
gpu_time = timeit.timeit(gpu_run, number=10)
print('run time:',cpu_time, gpu_time)

warmup: 0.7874266999999691 0.001418199999989156
run time: 0.7648017999999865 0.0013730000000578002
