# Simple GPU/CPU Benchmark 

## Imports

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import time
import json
from tensorflow.compat.v1.keras import backend as K
tf.config.threading.set_intra_op_parallelism_threads(1) # uncomment to run with 1 CPU instead of 16
tf.config.threading.set_inter_op_parallelism_threads(1) # uncomment to run with 1 CPU instead of 16

## Constants

In [None]:
BATCH_SIZE = 2056
DATASET_SIZE = 20000
DATASET_DIMENSION = 120
DATASET_CHANNELS = 3
DATASET_SHAPE = (DATASET_SIZE, DATASET_DIMENSION,DATASET_DIMENSION,DATASET_CHANNELS)
NUM_CLASSES = 10
SPE = 30
NUM_EPOCHS = 5
NEURONS_PER_LAYER = 2000

## Generate Synthethic Data

In [None]:
dummy_data = np.random.rand(DATASET_SIZE, DATASET_DIMENSION, DATASET_DIMENSION, DATASET_CHANNELS)
dummy_labels = np.random.randint(NUM_CLASSES, size=DATASET_SIZE)

## Helper Functions

In [None]:
def get_dataset(batch_size):
    dummy_dataset = tf.data.Dataset.from_tensor_slices((dummy_data, dummy_labels))
    dummy_dataset = dummy_dataset.prefetch(tf.data.experimental.AUTOTUNE).batch(batch_size).repeat()
    return dummy_dataset

In [None]:
def get_model(num_hidden_layers):
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(DATASET_DIMENSION, DATASET_DIMENSION, DATASET_CHANNELS)))
    for i in range(num_hidden_layers):
        model.add(keras.layers.Dense(NEURONS_PER_LAYER, activation='relu'))
    model.add(keras.layers.Dense(10, activation='softmax'))
    return model

# 1 CPU

In [None]:
with tf.device('/CPU:0'):
    model = get_model(75)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    start_cpu = time.time()
    model.fit(get_dataset(BATCH_SIZE), epochs=NUM_EPOCHS, steps_per_epoch=SPE)
    end_cpu = time.time()
    time_cpu = end_cpu - start_cpu
    print("CPU time: {} seconds".format(time_cpu))

## 16 CPU

In [None]:
with tf.device('/CPU:0'):
    model = get_model(75)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    sixteen_start_cpu = time.time()
    model.fit(get_dataset(BATCH_SIZE), epochs=NUM_EPOCHS, steps_per_epoch=SPE)
    sixteen_end_cpu = time.time()
    sixteen_time_cpu = sixteen_end_cpu - sixteen_start_cpu
    print("CPU time: {} seconds".format(sixteen_time_cpu))

# 1 GPU

In [None]:
with tf.device('/GPU:0'):
    model = get_model(90)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    start_gpu = time.time()
    model.fit(get_dataset(BATCH_SIZE), epochs=NUM_EPOCHS, steps_per_epoch=SPE)
    end_gpu = time.time()
    time_gpu = end_gpu - start_gpu
    print("GPU time: {} seconds".format(time_gpu))

## 2 GPU (Data Parallel training with All-Reduce Gradient Combination)

In [None]:
mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])
with mirrored_strategy.scope():
    model = get_model(75)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    start_two_gpu = time.time()
    model.fit(get_dataset(BATCH_SIZE), epochs=NUM_EPOCHS, steps_per_epoch=SPE)
    end_two_gpu = time.time()
    time_two_gpu = end_two_gpu - start_two_gpu
    print("Two GPU time: {} seconds".format(time_two_gpu))

## Plotting

In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.rcParams['text.latex.preamble']=[r"\usepackage{amssymb}",
                                     r"\usepackage{amsmath}"]

- 1 CPU, 5 Layers: 1624 seconds, 15 layers: 2396 seconds,  30 layers: 3583 seconds, 45 layers: 4850 seconds, 60 layers: 6046 seconds, 75 layers: 7311 seconds

- 16 CPU: 5 layers: 235 seconds, 15 layers: 337 seconds, 30 layers: 499 seconds, 45 layers: 663 seconds, 60 layers: 830 seconds, 75 layers: 985 seconds

- 1 GPU: 5 layers:  95 seconds, 15 layers: 99 seconds , 30 layers: 98 seconds, 45 layers:  116 seconds, 60 layers: 192 seconds, 75 layers: 199 seconds

- 2 GPU: 5 layers:  97 seconds, 15 layers: 101 seconds , 30 layers:  109 seconds, 45 layers:  107 seconds, 60 layers: 115 seconds, 75 layers: 181 seconds

In [None]:
raw_data = {'layers': 
        ['5 Layers', '15 Layers', '30 Layers', '45 Layers', '60 Layers', '75 Layers'],
        '1 Intel Xeon 3.50Ghz': [1624, 2396, 3583, 4850, 6046, 7311],
        '16 Intel Xeon 3.50Ghz': [235, 337, 499, 663, 830, 985],
        '1 Tesla P100': [95, 99, 98, 116, 192, 199],
        '2 Tesla P100': [97, 101, 109, 107, 115, 181]
           }
df = pd.DataFrame(raw_data, columns = ['layers','1 Intel Xeon 3.50Ghz', '16 Intel Xeon 3.50Ghz', '1 Tesla P100', '2 Tesla P100'])
df

In [None]:
# Setting the positions and width for the bars
pos = list(range(len(df['1 Intel Xeon 3.50Ghz']))) 
width = 0.25 
    
# Plotting the bars
fig, ax = plt.subplots(figsize=(10,5))

# Create a bar with pre_score data,
# in position pos,
plt.bar(pos, 
        #using df['pre_score'] data,
        df['1 Intel Xeon 3.50Ghz'], 
        # of width
        width, 
        # with alpha 0.5
        alpha=0.5, 
        # with color
        color='#EE3224', 
        # with label the first value in first_name
        label=df['layers'][0]) 

# Create a bar with mid_score data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos], 
        #using df['mid_score'] data,
        df['16 Intel Xeon 3.50Ghz'],
        # of width
        width, 
        # with alpha 0.5
        alpha=0.5, 
        # with color
        color='#F78F1E', 
        # with label the second value in first_name
        label=df['layers'][1]) 

# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*2 for p in pos], 
        #using df['post_score'] data,
        df['1 Tesla P100'], 
        # of width
        width, 
        # with alpha 0.5
        alpha=0.5, 
        # with color
        color='#FFC222', 
        # with label the third value in first_name
        label=df['layers'][2]) 

# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*3 for p in pos], 
        #using df['post_score'] data,
        df['2 Tesla P100'], 
        # of width
        width, 
        # with alpha 0.5
        alpha=0.5, 
        # with color
        color='lightblue', 
        # with label the third value in first_name
        label=df['layers'][3]) 

# Set the y axis label
ax.set_ylabel('Time (s)')

# Set the chart's title
ax.set_title('Deep Learning Hardware Benchmark')

# Set the position of the x ticks
ax.set_xticks([p + 1.5 * width for p in pos])

# Set the labels for the x ticks
ax.set_xticklabels(df['layers'])

# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*4)
plt.ylim([0, max(df['1 Intel Xeon 3.50Ghz'] + df['16 Intel Xeon 3.50Ghz'] + df['1 Tesla P100'] + df['1 Tesla P100'])] )

# Adding the legend and showing the plot
plt.legend(['1 Intel Xeon 3.50Ghz', '16 Intel Xeon 3.50Ghz', '1 Tesla P100', '2 Tesla P100'], loc='upper left')
plt.grid()
plt.savefig('results.eps', format='eps', dpi=1000)
plt.show()