# Dec 1, 2019 Tf2 Keras GPU check 
* Name: Jikhan Jeong

# Tf-2 keras required environment
* Ref: https://github.com/tensorflow/tensorflow/issues/31505
* 6) cuda/10.0.130   7) cudnn/7.6.4.38_cuda10.0
### Solving it with sbatch torch_env setting
* module load cuda/10.0.130   
* module load cudnn/7.6.4.38_cuda10.0

## Pytorch okay with tensforflow cuda and cudnn env
* pytorch  
module load cuda/10.1.105  
module load cudnn/7.5.1.10_cuda10.1  

* tf2 (default for jupyterlab.sh)
module load cuda/10.0.130     
module load cudnn/7.6.4.38_cuda10.0  

## With default from Pytorch 
---------------

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
print('Cuda Available: ',torch.cuda.is_available())
print('NVDIA Drive Version: ', torch._C._cuda_getDriverVersion())
print('Current Device ID: ', torch.cuda.current_device())
print('Cuda Device: ', torch.cuda.device(0))
print('Cuda Device Count: ', torch.cuda.device_count())
print('Cuda Device Name', torch.cuda.get_device_name(0))

Cuda Available:  True
NVDIA Drive Version:  10000
Current Device ID:  0
Cuda Device:  <torch.cuda.device object at 0x2b2f860e8750>
Cuda Device Count:  4
Cuda Device Name Tesla K80


In [3]:
torch.version.cuda

'9.2.148'

In [18]:
a = torch.DoubleTensor([1., 2.])

In [19]:
a = torch.FloatTensor([1., 2.]).cuda()

In [20]:
a = torch.cuda.FloatTensor([1., 2.])

In [21]:
a

tensor([1., 2.], device='cuda:3')

In [22]:
# Parameters and DataLoaders
input_size = 5
output_size = 2

batch_size = 30
data_size = 100

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [24]:
class RandomDataset(Dataset):

    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),
                         batch_size=batch_size, shuffle=True)

In [25]:
class Model(nn.Module):
    # Our model

    def __init__(self, input_size, output_size): # input size = 5, output size = 2
        super(Model, self).__init__()
        self.fc = nn.Linear(input_size, output_size) # linear regression build as class attribute

    def forward(self, input):                        # class method
        output = self.fc(input)                     
        print("\tIn Model: input size", input.size(),
              "output size", output.size())

        return output

In [26]:
model = Model(input_size, output_size)

In [27]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device
model.to(device)

Model(
  (fc): Linear(in_features=5, out_features=2, bias=True)
)

In [28]:
for data in rand_loader:
    input = data.to(device)
    output = model(input)
    print("Outside: input size", input.size(),
          "output_size", output.size())

	In Model: input size torch.Size([30, 5]) output size torch.Size([30, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([30, 5]) output size torch.Size([30, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([30, 5]) output size torch.Size([30, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])


In [29]:
output

tensor([[-0.5042,  0.1086],
        [ 0.1842,  0.0089],
        [ 1.1781,  1.1860],
        [-1.0322, -0.8087],
        [-0.4633,  0.3866],
        [-0.5599, -1.0831],
        [-0.7268, -0.1608],
        [-0.3674,  0.5973],
        [-0.1525, -0.3540],
        [ 0.0367,  0.5255]], device='cuda:0', grad_fn=<AddmmBackward>)

In [30]:
torch.cuda.device_count() 

4

In [31]:
if torch.cuda.device_count() > 1:
   print("Let's use", torch.cuda.device_count(), "GPUs!")
   model = nn.DataParallel(model) ### Multi GPU setting in here 4 GPU
    
    
model.to(device)

Let's use 4 GPUs!


DataParallel(
  (module): Model(
    (fc): Linear(in_features=5, out_features=2, bias=True)
  )
)

In [32]:
for data in rand_loader:
    input = data.to(device) # sending data to GPU by data.to(device)                     
    output = model(input)   # model was send to GPU in the above line by model.to(device)
    print("Outside: input size", input.size(),
          "output_size", output.size())

	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([6, 5]) output size torch.Size([6, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([6, 5]) output size torch.Size([6, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 2])
	In Model: input size torch.Size([6, 5]) output size torch.Size(

--------------------
# TF2 Keras GPU
--------------------

In [4]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  4


In [5]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

In [6]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 11646373990410451427, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 15731392629559516249
 physical_device_desc: "device: XLA_CPU device", name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 11075413607
 locality {
   bus_id: 1
   links {
     link {
       device_id: 1
       type: "StreamExecutor"
       strength: 1
     }
   }
 }
 incarnation: 16710718711898020266
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:04:00.0, compute capability: 3.7", name: "/device:GPU:1"
 device_type: "GPU"
 memory_limit: 11073421312
 locality {
   bus_id: 1
   links {
     link {
       type: "StreamExecutor"
       strength: 1
     }
   }
 }
 incarnation: 4443030166975527588
 physical_device_desc: "device: 1, name: Tesla K80, pci bus id: 0000:05:00.0, compute capability: 3.7", name: "/device:GPU:2"
 device_type: "GPU"
 mem

In [7]:
tf.test.is_gpu_available(
    cuda_only=False,
    min_cuda_compute_capability=None
)

True

In [8]:
tf.test.is_built_with_cuda() 

True

In [9]:
tf.test.is_gpu_available()

True

In [10]:
tf.keras.utils.multi_gpu_model(model=model, gpus=2)

NameError: name 'model' is not defined

In [11]:
value = tf.test.is_gpu_available(
    cuda_only=False,
    min_cuda_compute_capability=None
)
print ('***If TF can access GPU: ***\n\n',value) # MUST RETURN True IF IT CAN!!

***If TF can access GPU: ***

 True


In [12]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" # 0,1,2,3 are number of GPUs

In [13]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 1528264065010377905, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 1841832168431277891
 physical_device_desc: "device: XLA_CPU device", name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 11075413607
 locality {
   bus_id: 1
   links {
     link {
       device_id: 1
       type: "StreamExecutor"
       strength: 1
     }
   }
 }
 incarnation: 7867523868729562787
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:04:00.0, compute capability: 3.7", name: "/device:GPU:1"
 device_type: "GPU"
 memory_limit: 11073421312
 locality {
   bus_id: 1
   links {
     link {
       type: "StreamExecutor"
       strength: 1
     }
   }
 }
 incarnation: 7918010352637121404
 physical_device_desc: "device: 1, name: Tesla K80, pci bus id: 0000:05:00.0, compute capability: 3.7", name: "/device:GPU:2"
 device_type: "GPU"
 memory

In [14]:
import tensorflow as tf
tf.test.is_gpu_available()

True

In [15]:
import tensorflow as tf
tf.test.is_gpu_available()

True

In [16]:
tf.test.is_built_with_cuda()

True

In [17]:
import tensorflow as tf
tf.__version__

'2.0.0'