In [1]:
import tensorflow as tf
import time

# Function to perform matrix multiplication on a specific device
def perform_matrix_multiplication(device_name):
    with tf.device(device_name):
        # Random matrices of size 10000x10000
        a = tf.random.normal([10000, 10000])
        b = tf.random.normal([10000, 10000])

        start_time = time.time()
        # Performing matrix multiplication
        c = tf.matmul(a, b)
        # Ensure the computation is complete with `tf.compat.v1.Session.run` or `tf.reduce_sum`
        tf.reduce_sum(c)  # This forces the execution of the multiplication
        end_time = time.time()

    return end_time - start_time  # Return the time taken for the operation

# Run matrix multiplication on CPU
cpu_time = perform_matrix_multiplication('/CPU:0')
print(f"Time taken to multiply two matrices on CPU: {cpu_time} seconds")

# If a GPU is available, run the matrix multiplication on GPU
if tf.config.list_physical_devices('GPU'):
    gpu_time = perform_matrix_multiplication('/GPU:0')
    print(f"Time taken to multiply two matrices on GPU: {gpu_time} seconds")
    print(f"Speedup from GPU over CPU: {cpu_time/gpu_time}x")
else:
    print("No GPU found. Please install TensorFlow with GPU support and ensure you have a compatible GPU.")


2023-12-25 12:42:23.697122: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-25 12:42:23.697206: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-25 12:42:23.777699: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-25 12:42:23.971258: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-25 12:42:27.675738: I external/local_xla/xla/

Time taken to multiply two matrices on CPU: 7.546968698501587 seconds
Time taken to multiply two matrices on GPU: 0.3188819885253906 seconds
Speedup from GPU over CPU: 23.666964488653356x


In [2]:
import tensorflow as tf
import time


tf.debugging.set_log_device_placement(True)

start_time = time.time()


# Create some tensors
a = tf.random.uniform([10000, 10000], minval=-1, maxval=1)
b = tf.random.uniform([10000, 10000], minval=-1, maxval=1)

with tf.device('/GPU:0'):
    c = tf.matmul(a, b)
    print(c.numpy())  # This will force the execution of the GPU operation

end_time = time.time()


# Calculate and print the time taken
time_taken = end_time - start_time
print(f"Time taken for matrix multiplication on GPU: {time_taken} seconds")

[[-25.21171   -15.00795   -47.54557   ...  26.132715   47.410583
   -5.137459 ]
 [ 32.976593   31.447002   26.435207  ... -27.413687   -7.8577833
  -15.869736 ]
 [  7.272471  -35.530746   -6.715096  ...  30.648771   13.504829
   42.0355   ]
 ...
 [-11.756889  -32.35502    30.381445  ...  43.966766  -40.25678
  -14.970104 ]
 [-12.614656   18.945028  -10.417493  ...   6.8377333   2.1021197
  -36.534637 ]
 [-27.715652   -1.5450845 -28.627815  ... -35.166393   13.873825
   -8.943923 ]]
Time taken for matrix multiplication on GPU: 0.9891977310180664 seconds


In [3]:
import tensorflow as tf
import time


tf.debugging.set_log_device_placement(True)

start_time = time.time()


# Create some tensors
a = tf.random.uniform([10000, 10000], minval=-1, maxval=1)
b = tf.random.uniform([10000, 10000], minval=-1, maxval=1)

with tf.device('/CPU:0'):
    c = tf.matmul(a, b)
    print(c.numpy())  # This will force the execution of the GPU operation

end_time = time.time()


# Calculate and print the time taken
time_taken = end_time - start_time
print(f"Time taken for matrix multiplication on CPU: {time_taken} seconds")

[[ 54.706417    35.25223    -25.931446   ... -32.786674     0.3133008
   37.327927  ]
 [ 62.154137    38.426796    23.918774   ...  18.37424     28.097923
  -17.517694  ]
 [ -1.1632054   35.89683    -33.764175   ...  23.45009     -8.651368
   50.360504  ]
 ...
 [-21.110142     0.29014564  26.03484    ...   5.524087    17.23555
   17.679863  ]
 [-20.70401    -16.110723    30.068432   ... -17.256062   -23.555487
  -46.928604  ]
 [ -2.6144938  -50.252144     9.069513   ...  19.887709   -20.926338
   24.30942   ]]
Time taken for matrix multiplication on CPU: 8.297615051269531 seconds
