In [1]:
import tensorflow as tf
import time

# Function to perform matrix multiplication on a specific device
def perform_matrix_multiplication(device_name):
    with tf.device(device_name):
        # Random matrices of size 10000x10000
        a = tf.random.normal([10000, 10000])
        b = tf.random.normal([10000, 10000])

        start_time = time.time()
        # Performing matrix multiplication
        c = tf.matmul(a, b)
        # Ensure the computation is complete with `tf.compat.v1.Session.run` or `tf.reduce_sum`
        tf.reduce_sum(c)  # This forces the execution of the multiplication
        end_time = time.time()

    return end_time - start_time  # Return the time taken for the operation

# Run matrix multiplication on CPU
cpu_time = perform_matrix_multiplication('/CPU:0')
print(f"Time taken to multiply two matrices on CPU: {cpu_time} seconds")

# If a GPU is available, run the matrix multiplication on GPU
if tf.config.list_physical_devices('GPU'):
    gpu_time = perform_matrix_multiplication('/GPU:0')
    print(f"Time taken to multiply two matrices on GPU: {gpu_time} seconds")
    print(f"Speedup from GPU over CPU: {cpu_time/gpu_time}x")
else:
    print("No GPU found. Please install TensorFlow with GPU support and ensure you have a compatible GPU.")


2023-12-20 22:27:38.030537: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-20 22:27:38.030680: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-20 22:27:38.037224: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-20 22:27:38.070282: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-20 22:27:39.934425: I external/local_xla/xla/

Time taken to multiply two matrices on CPU: 7.52169942855835 seconds
Time taken to multiply two matrices on GPU: 0.2878265380859375 seconds
Speedup from GPU over CPU: 26.132751616922018x


In [16]:
import tensorflow as tf
import time


tf.debugging.set_log_device_placement(True)

start_time = time.time()


# Create some tensors
a = tf.random.uniform([10000, 10000], minval=-1, maxval=1)
b = tf.random.uniform([10000, 10000], minval=-1, maxval=1)

with tf.device('/GPU:0'):
    c = tf.matmul(a, b)
    print(c.numpy())  # This will force the execution of the GPU operation

end_time = time.time()


# Calculate and print the time taken
time_taken = end_time - start_time
print(f"Time taken for matrix multiplication on GPU: {time_taken} seconds")

[[ 20.826351  -10.377904   19.144958  ...   9.106541  -35.388725
  -23.901659 ]
 [ 27.77131     2.2206004   7.8245883 ... -52.72453    17.865572
  -33.91282  ]
 [ 34.936424    3.9214725  12.059947  ... -11.826799  -22.960577
   10.02722  ]
 ...
 [-13.130393  116.99628   -30.282137  ... 104.0798    -25.496578
  -90.80625  ]
 [-14.896624   44.218502   34.597813  ...  17.772146  -19.295525
  -38.384884 ]
 [  5.3490667  37.013767    9.868298  ...  31.596693   64.08423
   89.68532  ]]
Time taken for matrix multiplication on GPU: 1.547560691833496 seconds


In [17]:
import tensorflow as tf
import time


tf.debugging.set_log_device_placement(True)

start_time = time.time()


# Create some tensors
a = tf.random.uniform([10000, 10000], minval=-1, maxval=1)
b = tf.random.uniform([10000, 10000], minval=-1, maxval=1)

with tf.device('/CPU:0'):
    c = tf.matmul(a, b)
    print(c.numpy())  # This will force the execution of the GPU operation

end_time = time.time()


# Calculate and print the time taken
time_taken = end_time - start_time
print(f"Time taken for matrix multiplication on CPU: {time_taken} seconds")

[[-22.050137    27.594654   -33.4969     ...  29.05776    -59.836163
   20.963581  ]
 [-18.175238    30.86615     40.996117   ...  22.481152    20.784237
   55.723404  ]
 [ 24.028183     0.31725502 -33.73317    ...  14.069219   -47.05213
   14.342605  ]
 ...
 [ -5.3877506    1.2093189   23.639315   ...  32.074562   -63.377113
   60.591873  ]
 [ 56.366985    51.459557    -6.1103654  ... -43.658554    24.827822
  -16.892273  ]
 [-39.8752      -5.703271    39.06949    ... -17.966967   -44.217667
   13.454746  ]]
Time taken for matrix multiplication on CPU: 9.173386573791504 seconds
