In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

In [None]:
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.12.0
[]


In [None]:
# example of loading the mnist dataset
# load dataset
(trainX, trainY), (testX, testY) = mnist.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape))
print('Test: X=%s, y=%s' % (testX.shape, testY.shape))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Train: X=(60000, 28, 28), y=(60000,)
Test: X=(10000, 28, 28), y=(10000,)


In [None]:
# reshape dataset to have a single channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))

In [None]:

# one hot encode target values
trainY = to_categorical(trainY)
testY = to_categorical(testY)

In [None]:
trainX = trainX.astype('float32')
testX = testX.astype('float32')
# normalize to range 0-1
trainX = trainX / 255.0
testX = testX / 255.0

In [None]:
model = tf.keras.models.Sequential([
                            tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3), padding='same', kernel_initializer='glorot_uniform', activation='selu', input_shape=(28, 28, 1)),
                            tf.keras.layers.MaxPool2D(pool_size=(2,2)),
                            tf.keras.layers.Flatten(),
                            tf.keras.layers.Dense(64, kernel_initializer='glorot_uniform', activation='selu'),
                            tf.keras.layers.Dense(10, activation='softmax')
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 6272)              0         
                                                                 
 dense (Dense)               (None, 64)                401472    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 402,442
Trainable params: 402,442
Non-trainable params: 0
__________________________________________________

In [None]:

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])


# GPU CODE

In [None]:
import time
import json

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

epoch_data = []

# Training configuration
batch_size = 64
num_epochs = 20

# Initialize variables for monitoring
total_training_time = 0
total_memory_usage = 0

throughputs = []
accs = []
losss = []

# Start training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0
    epoch_memory_use = 0

    epoch_info = {
        "epoch": epoch + 1,
        "epoch_memory_use": 0,
        "epoch_time": 0,
        "batch_processing_time": 0,
        "throughput": 0,
        "accuracy": 0,
        "loss": 0,
    }

    epoch_start_time = time.time()

    for batch in range(0, len(trainX), batch_size):
        batch_start_time = time.time()

        # Extract a batch of data
        batch_x = trainX[batch:batch + batch_size]
        batch_y = trainY[batch:batch + batch_size]

        # Perform training step
        with tf.device('/GPU:0'):
            batch_history = model.train_on_batch(batch_x, batch_y)

        batch_processing_time = time.time() - batch_start_time
        total_batch_processing_time += batch_processing_time

        # Calculate memory usage (Note: This is an approximation)
        memory_usage = tf.config.experimental.get_memory_info('GPU:0')['current']
        epoch_memory_use += memory_usage

    epoch_end_time = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time
    total_memory_usage += epoch_memory_use

    num_samples = len(trainX)
    steps_per_epoch = num_samples // batch_size
    throughput = num_samples / epoch_time

    throughputs.append(throughput)

    print(f" - Memory Usage: {epoch_memory_use:.2f} bytes")
    print(f" - Epoch Time: {epoch_time:.2f} seconds")
    print(f" - Batch Processing Time: {total_batch_processing_time:.2f} seconds")
    print(f" - Throughput: {throughput:.2f} samples/second")

    # Evaluate accuracy and convergence
    eval_results = model.evaluate(testX, testY, verbose=0)
    accuracy = eval_results[1]
    loss = eval_results[0]

    accs.append(accuracy)
    losss.append(loss)

    print(f" - Accuracy: {accuracy:.4f}")
    print(f" - Loss: {loss:.4f}")

    epoch_info["epoch_memory_use"] = epoch_memory_use
    epoch_info["epoch_time"] = epoch_time
    epoch_info["batch_processing_time"] = total_batch_processing_time
    epoch_info["throughput"] = throughput
    epoch_info["accuracy"] = accuracy
    epoch_info["loss"] = loss

    epoch_data.append(epoch_info)


with open('GPU_Python_epoch_data.json', 'w') as json_file:
    json.dump(epoch_data, json_file, indent=4)

print(f"Total Training Time: {total_training_time:.2f} seconds")
print(f"Total Memory Usage: {total_memory_usage:.2f} bytes")
print(f"Average Memory Usage: {total_memory_usage / num_epochs:.2f} bytes")
print(f"Average Throughput: {sum(throughputs) / num_epochs:.2f} samples/second")
print(f"Average Accuracy: {sum(accs) / num_epochs:.2f}")
print(f"Average Loss: {sum(losss) / num_epochs:.2f}")


Epoch 1/20
 - Memory Usage: 212728879360.00 bytes
 - Epoch Time: 10.30 seconds
 - Batch Processing Time: 10.28 seconds
 - Throughput: 5824.50 samples/second
 - Accuracy: 0.9615
 - Loss: 0.1279
Epoch 2/20
 - Memory Usage: 242522162176.00 bytes
 - Epoch Time: 8.65 seconds
 - Batch Processing Time: 8.63 seconds
 - Throughput: 6937.39 samples/second
 - Accuracy: 0.9618
 - Loss: 0.1152
Epoch 3/20
 - Memory Usage: 274370733056.00 bytes
 - Epoch Time: 9.07 seconds
 - Batch Processing Time: 9.05 seconds
 - Throughput: 6614.66 samples/second
 - Accuracy: 0.9758
 - Loss: 0.0745
Epoch 4/20
 - Memory Usage: 304162140160.00 bytes
 - Epoch Time: 9.13 seconds
 - Batch Processing Time: 9.11 seconds
 - Throughput: 6570.94 samples/second
 - Accuracy: 0.9781
 - Loss: 0.0700
Epoch 5/20
 - Memory Usage: 333952733184.00 bytes
 - Epoch Time: 8.93 seconds
 - Batch Processing Time: 8.91 seconds
 - Throughput: 6718.67 samples/second
 - Accuracy: 0.9806
 - Loss: 0.0689
Epoch 6/20
 - Memory Usage: 380211158528.00

# CPU PART

In [None]:
import tensorflow as tf
import time
import json

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

epoch_data = []

# Training configuration
batch_size = 64
num_epochs = 20

# Initialize variables for monitoring
total_training_time = 0
total_memory_usage = 0

throughputs = []
accs = []
losss = []

# Start training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0
    epoch_memory_use = 0

    epoch_info = {
        "epoch": epoch + 1,
        "epoch_time": 0,
        "batch_processing_time": 0,
        "throughput": 0,
        "accuracy": 0,
        "loss": 0,
    }

    epoch_start_time = time.time()

    for batch in range(0, len(trainX), batch_size):
        batch_start_time = time.time()

        # Extract a batch of data
        batch_x = trainX[batch:batch + batch_size]
        batch_y = trainY[batch:batch + batch_size]

        # Perform training step on CPU (no need to specify device)
        batch_history = model.train_on_batch(batch_x, batch_y)

        batch_processing_time = time.time() - batch_start_time
        total_batch_processing_time += batch_processing_time



    epoch_end_time = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time

    num_samples = len(trainX)
    steps_per_epoch = num_samples // batch_size
    throughput = num_samples / epoch_time

    throughputs.append(throughput)

    print(f" - Epoch Time: {epoch_time:.2f} seconds")
    print(f" - Batch Processing Time: {total_batch_processing_time:.2f} seconds")
    print(f" - Throughput: {throughput:.2f} samples/second")

    # Evaluate accuracy and convergence
    eval_results = model.evaluate(testX, testY, verbose=0)
    accuracy = eval_results[1]
    loss = eval_results[0]

    accs.append(accuracy)
    losss.append(loss)

    print(f" - Accuracy: {accuracy:.4f}")
    print(f" - Loss: {loss:.4f}")

    epoch_info["epoch_time"] = epoch_time
    epoch_info["batch_processing_time"] = total_batch_processing_time
    epoch_info["throughput"] = throughput
    epoch_info["accuracy"] = accuracy
    epoch_info["loss"] = loss

    epoch_data.append(epoch_info)



with open('CPU_Python_epoch_data.json', 'w') as json_file:
    json.dump(epoch_data, json_file, indent=4)

print(f"Total Training Time: {total_training_time:.2f} seconds")
print(f"Average Throughput: {sum(throughputs) / num_epochs:.2f} samples/second")
print(f"Average Accuracy: {sum(accs) / num_epochs:.2f}")
print(f"Average Loss: {sum(losss) / num_epochs:.2f}")


Epoch 1/20
 - Epoch Time: 47.81 seconds
 - Batch Processing Time: 47.80 seconds
 - Throughput: 1255.09 samples/second
 - Accuracy: 0.9819
 - Loss: 0.1197
Epoch 2/20
 - Epoch Time: 46.17 seconds
 - Batch Processing Time: 46.17 seconds
 - Throughput: 1299.55 samples/second
 - Accuracy: 0.9821
 - Loss: 0.1108
Epoch 3/20
 - Epoch Time: 45.97 seconds
 - Batch Processing Time: 45.96 seconds
 - Throughput: 1305.26 samples/second
 - Accuracy: 0.9766
 - Loss: 0.1692
Epoch 4/20
 - Epoch Time: 46.33 seconds
 - Batch Processing Time: 46.33 seconds
 - Throughput: 1294.99 samples/second
 - Accuracy: 0.9825
 - Loss: 0.1225
Epoch 5/20
 - Epoch Time: 46.26 seconds
 - Batch Processing Time: 46.25 seconds
 - Throughput: 1297.08 samples/second
 - Accuracy: 0.9810
 - Loss: 0.1289
Epoch 6/20
 - Epoch Time: 45.09 seconds
 - Batch Processing Time: 45.09 seconds
 - Throughput: 1330.58 samples/second
 - Accuracy: 0.9790
 - Loss: 0.1497
Epoch 7/20
 - Epoch Time: 45.04 seconds
 - Batch Processing Time: 45.04 seco