### Tensorflow Cuda Tests
Test to make sure if graphics card is working. And AVX-xxx is configured correctly. 
Otherwise work on your build. ;)

In [1]:
# Imports
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from numba import cuda 
import os

# Remarks to
# https://www.tensorflow.org/api_docs/python/tf/test/is_gpu_available
# https://stackoverflow.com/questions/38009682/how-to-tell-if-tensorflow-is-using-gpu-acceleration-from-inside-python-shell
# https://www.analyticsvidhya.com/blog/2021/11/benchmarking-cpu-and-gpu-performance-with-tensorflow/
# https://stackoverflow.com/questions/64997553/python-requires-ipykernel-to-be-installed
# https://stackoverflow.com/questions/65124633/line-magic-function-time-not-found
# https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
# https://stackoverflow.com/questions/43332703/open-terminal-run-command-python

In [2]:
# Free Memory
device = cuda.get_current_device()
device.reset()

# Open 2 Bash windows with GPU usage and cpu usage
# GPU
os.system("gnome-terminal -e 'bash -c \"nvidia-smi -l 1; sleep 1000000\" '")

# CPU
os.system("gnome-terminal -e 'bash -c \"mpstat 5; sleep 1000000\" '")

# Ram
os.system("gnome-terminal -e 'bash -c \" free -m -h -s 4; sleep 1000000\" '")

0

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
print("Number GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Number GPUs Available:  1


## Test/Benchmark
Simple training of cifar. 

In [5]:
# Import split
(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()

In [6]:
# checking images shape
X_train.shape, X_test.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

In [7]:
# display single image shape
X_train[0].shape

(32, 32, 3)

In [8]:
# checking labels
y_train[:5]

array([[6],
       [9],
       [9],
       [4],
       [1]], dtype=uint8)

In [9]:
# scaling image values between 0-1
X_train_scaled = X_train/255
X_test_scaled = X_test/255

# one hot encoding labels
y_train_encoded = keras.utils.to_categorical(y_train, num_classes = 10, dtype = 'float32')
y_test_encoded = keras.utils.to_categorical(y_test, num_classes = 10, dtype = 'float32')

In [10]:
# Model
def get_model():
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(32,32,3)),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(1000, activation='relu'),
        keras.layers.Dense(10, activation='sigmoid')    
    ])
    model.compile(optimizer='SGD',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model

#### Benchmark Section 

In [11]:
# Variables
_epochs = 10

In [12]:
%%timeit -n1 -r1

# CPU
with tf.device('/CPU:0'):
    model_cpu = get_model()
    model_cpu.fit(X_train_scaled, y_train_encoded, epochs = _epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
4min 58s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [13]:
%%timeit -n1 -r1

# GPU
with tf.device('/GPU:0'):
    model_gpu = get_model()
    model_gpu.fit(X_train_scaled, y_train_encoded, epochs = _epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1min 9s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [14]:
%%timeit -n1 -r1

# all devices
model_all = get_model()
model_all.fit(X_train_scaled, y_train_encoded, epochs = _epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1min 9s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [15]:
# Free Memory
device = cuda.get_current_device()
device.reset()