# TensorFlow GPU Test
This file is to check that the GPUs are being used by *Tensorflow 1.x* correctly

In [13]:
# Specify visible cuda device
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

In [14]:
from tensorflow.python.platform import build_info as tf_build_info
print(tf_build_info.cudnn_version_number)

7.6


In [15]:
import tensorflow as tf
print(tf.__version__)

1.15.0


In [16]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:1 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:3b:00.0, compute capability: 7.0
/job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:d8:00.0, compute capability: 7.0



In [17]:
# TF will allocate all available memory on each visible GPU if not told otherwise. 
# Here are 5 ways to stick to just one (or a few) GPUs.: https://stackoverflow.com/questions/40069883/how-to-set-specific-gpu-in-tensorflow
config=tf.ConfigProto(device_count={'GPU':1})
sess = tf.Session(config=config)

In [18]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) 

Num GPUs Available:  2


In [19]:
# Step 1: Check TensorFlow version
print("TensorFlow Version:", tf.__version__)

# Step 2: List available GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
    try:
        # Step 3: Set memory growth to avoid memory overflow
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        # If you want to use specific GPUs, you can specify them here:
        # tf.config.experimental.set_visible_devices(gpus[0:2], 'GPU')
        print(f"GPUs available: {len(gpus)}")
    except RuntimeError as e:
        # Memory growth must be set before initializing GPUs
        print(e)
else:
    print("No GPUs found")

TensorFlow Version: 1.15.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
GPUs available: 2


In [20]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [21]:
tf.test.is_gpu_available()

True

In [22]:
from tensorflow.python.platform import build_info as tf_build_info
print(tf_build_info.cuda_version_number)
# 9.0 in v1.10.0
print(tf_build_info.cudnn_version_number)
# 7 in v1.10.0

10.0
7.6


In [23]:
import glob
import os
from os.path import join as pjoin
import subprocess
import sys

IS_WINDOWS = False

def get_cuda_version(cuda_home):
    """Locate the CUDA version
    """
    version_file = os.path.join(cuda_home, "version.txt")
    try:
        if os.path.isfile(version_file):
            with open(version_file) as f:
                version_str = f.readline().replace('\n', '').replace('\r', '')
                return version_str.split(" ")[2][:4]
        else:
            version_str = subprocess.check_output([os.path.join(cuda_home,"bin","nvcc"),"--version"])
            version_str=str(version_str).replace('\n', '').replace('\r', '')
            idx=version_str.find("release")
            return version_str[idx+len("release "):idx+len("release ")+4]
    except:
        raise RuntimeError("Cannot read cuda version file") 
def locate_cuda():
    """Locate the CUDA environment on the system

    Returns a dict with keys 'home', 'include' and 'lib64'
    and values giving the absolute path to each directory.

    Starts by looking for the CUDA_HOME or CUDA_PATH env variable. If not found, everything
    is based on finding 'nvcc' in the PATH.
    """
    # Guess #1
    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
    if cuda_home is None:
        # Guess #2
        try:
            which = 'where' if IS_WINDOWS else 'which'
            nvcc = subprocess.check_output(
                [which, 'nvcc']).decode().rstrip('\r\n')
            cuda_home = os.path.dirname(os.path.dirname(nvcc))
        except subprocess.CalledProcessError:
            # Guess #3
            if IS_WINDOWS:
                cuda_homes = glob.glob(
                    'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
                if len(cuda_homes) == 0:
                    cuda_home = ''
                else:
                    cuda_home = cuda_homes[0]
            else:
                cuda_home = '/usr/local/cuda'
            if not os.path.exists(cuda_home):
                cuda_home = None
    version = get_cuda_version(cuda_home)
    cudaconfig = {'home': cuda_home,
                  'include': pjoin(cuda_home, 'include'),
                  'lib64': pjoin(cuda_home, pjoin('lib', 'x64') if IS_WINDOWS else 'lib64')}
    if not all([os.path.exists(v) for v in cudaconfig.values()]):
        raise EnvironmentError(
            'The CUDA  path could not be located in $PATH, $CUDA_HOME or $CUDA_PATH. '
            'Either add it to your path, or set $CUDA_HOME or $CUDA_PATH.')

    return cudaconfig, version


CUDA, CUDA_VERSION = locate_cuda()
print('CUDA', CUDA)
print('CUDA_VERSION', CUDA_VERSION)

CUDA {'home': '/software/spackages/linux-rocky8-x86_64/gcc-9.5.0/cuda-11.1.1-gvdep34zxhk3vvzcg3dti7ytgmuf5547', 'include': '/software/spackages/linux-rocky8-x86_64/gcc-9.5.0/cuda-11.1.1-gvdep34zxhk3vvzcg3dti7ytgmuf5547/include', 'lib64': '/software/spackages/linux-rocky8-x86_64/gcc-9.5.0/cuda-11.1.1-gvdep34zxhk3vvzcg3dti7ytgmuf5547/lib64'}
CUDA_VERSION 11.1


In [24]:
import tensorflow as tf
with tf.device('/gpu:1'):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
    c = tf.matmul(a, b)

with tf.Session() as sess:
    print (sess.run(c))


[[22. 28.]
 [49. 64.]]
