In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

In [13]:
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.12.0
[]


In [2]:
# example of loading the mnist dataset
# load dataset
(trainX, trainY), (testX, testY) = mnist.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape))
print('Test: X=%s, y=%s' % (testX.shape, testY.shape))

Train: X=(60000, 28, 28), y=(60000,)
Test: X=(10000, 28, 28), y=(10000,)


In [3]:
# reshape dataset to have a single channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))

In [4]:

# one hot encode target values
trainY = to_categorical(trainY)
testY = to_categorical(testY)

In [5]:
trainX = trainX.astype('float32')
testX = testX.astype('float32')
# normalize to range 0-1
trainX = trainX / 255.0
testX = testX / 255.0

In [6]:
model = tf.keras.models.Sequential([
                            tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3), padding='same', kernel_initializer='glorot_uniform', activation='selu', input_shape=(28, 28, 1)),
                            tf.keras.layers.MaxPool2D(pool_size=(2,2)),
                            tf.keras.layers.Flatten(),
                            tf.keras.layers.Dense(64, kernel_initializer='glorot_uniform', activation='selu'),
                            tf.keras.layers.Dense(10, activation='softmax')
])

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 6272)              0         
                                                                 
 dense (Dense)               (None, 64)                401472    
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 402,442
Trainable params: 402,442
Non-trainable params: 0
__________________________________________________

In [12]:

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])


# GPU CODE

In [13]:
import time
import json

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

epoch_data = {
    "epoch": [],
    "epoch_memory_usage": [],
    "epoch_time": [],
    "batch_processing_time": [],
    "throughput": [],
    "accuracy": [],
    "loss": [],
}

# Training configuration
batch_size = 64
num_epochs = 10

# Initialize variables for monitoring
total_training_time = 0
total_memory_usage = 0

throughputs = []
accs = []
losss = []

# Start training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0
    epoch_memory_use = 0



    epoch_start_time = time.time()

    for batch in range(0, len(trainX), batch_size):
        batch_start_time = time.time()

        # Extract a batch of data
        batch_x = trainX[batch:batch + batch_size]
        batch_y = trainY[batch:batch + batch_size]

        # Perform training step
        with tf.device('/GPU:0'):
            batch_history = model.train_on_batch(batch_x, batch_y)

        batch_processing_time = time.time() - batch_start_time
        total_batch_processing_time += batch_processing_time

        # Calculate memory usage (Note: This is an approximation)
        memory_usage = tf.config.experimental.get_memory_info('GPU:0')['current']
        epoch_memory_use += memory_usage

    epoch_end_time = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time
    total_memory_usage += epoch_memory_use

    num_samples = len(trainX)
    steps_per_epoch = num_samples // batch_size
    throughput = num_samples / epoch_time

    throughputs.append(throughput)

    print(f" - Memory Usage: {epoch_memory_use:.2f} bytes")
    print(f" - Epoch Time: {epoch_time:.2f} seconds")
    print(f" - Batch Processing Time: {total_batch_processing_time:.2f} seconds")
    print(f" - Throughput: {throughput:.2f} samples/second")

    # Evaluate accuracy and convergence
    eval_results = model.evaluate(testX, testY, verbose=0)
    accuracy = eval_results[1]
    loss = eval_results[0]

    accs.append(accuracy)
    losss.append(loss)

    print(f" - Accuracy: {accuracy:.4f}")
    print(f" - Loss: {loss:.4f}")

    epoch_data["epoch"].append(epoch + 1)
    epoch_data["epoch_memory_usage"].append(epoch_memory_use)
    epoch_data["epoch_time"].append(epoch_time)
    epoch_data["batch_processing_time"].append(total_batch_processing_time)
    epoch_data["throughput"].append(throughput)
    epoch_data["accuracy"].append(accuracy)
    epoch_data["loss"].append(loss)



with open('GPU_PYTHON_epoch_data.json', 'w') as json_file:
    json.dump(epoch_data, json_file, indent=4)

print(f"Total Training Time: {total_training_time:.2f} seconds")
print(f"Total Memory Usage: {total_memory_usage:.2f} bytes")
print(f"Average Memory Usage: {total_memory_usage / num_epochs:.2f} bytes")
print(f"Average Throughput: {sum(throughputs) / num_epochs:.2f} samples/second")
print(f"Average Accuracy: {sum(accs) / num_epochs:.2f}")
print(f"Average Loss: {sum(losss) / num_epochs:.2f}")


Epoch 1/10
 - Memory Usage: 14002824960.00 bytes
 - Epoch Time: 11.35 seconds
 - Batch Processing Time: 11.33 seconds
 - Throughput: 5284.41 samples/second
 - Accuracy: 0.9646
 - Loss: 0.1137
Epoch 2/10
 - Memory Usage: 44347799552.00 bytes
 - Epoch Time: 9.80 seconds
 - Batch Processing Time: 9.77 seconds
 - Throughput: 6124.86 samples/second
 - Accuracy: 0.9728
 - Loss: 0.0811
Epoch 3/10
 - Memory Usage: 22005989888.00 bytes
 - Epoch Time: 9.91 seconds
 - Batch Processing Time: 9.88 seconds
 - Throughput: 6056.04 samples/second
 - Accuracy: 0.9743
 - Loss: 0.0778
Epoch 4/10
 - Memory Usage: 44347799552.00 bytes
 - Epoch Time: 9.20 seconds
 - Batch Processing Time: 9.17 seconds
 - Throughput: 6524.51 samples/second
 - Accuracy: 0.9773
 - Loss: 0.0729
Epoch 5/10
 - Memory Usage: 76549648384.00 bytes
 - Epoch Time: 10.05 seconds
 - Batch Processing Time: 10.03 seconds
 - Throughput: 5970.24 samples/second
 - Accuracy: 0.9719
 - Loss: 0.1004
Epoch 6/10
 - Memory Usage: 106340648448.00 by

# CPU PART

In [11]:
import tensorflow as tf
import time
import json

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

epoch_data = {
    "epoch": [],
    "epoch_memory_usage": [],
    "epoch_time": [],
    "batch_processing_time": [],
    "throughput": [],
    "accuracy": [],
    "loss": [],
}

# Training configuration
batch_size = 64
num_epochs = 10

# Initialize variables for monitoring
total_training_time = 0
total_memory_usage = 0

throughputs = []
accs = []
losss = []

# Start training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0
    epoch_memory_use = 0



    epoch_start_time = time.time()

    for batch in range(0, len(trainX), batch_size):
        batch_start_time = time.time()

        # Extract a batch of data
        batch_x = trainX[batch:batch + batch_size]
        batch_y = trainY[batch:batch + batch_size]

        # Perform training step on CPU (no need to specify device)
        batch_history = model.train_on_batch(batch_x, batch_y)

        batch_processing_time = time.time() - batch_start_time
        total_batch_processing_time += batch_processing_time



    epoch_end_time = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time

    num_samples = len(trainX)
    steps_per_epoch = num_samples // batch_size
    throughput = num_samples / epoch_time

    throughputs.append(throughput)

    print(f" - Epoch Time: {epoch_time:.2f} seconds")
    print(f" - Batch Processing Time: {total_batch_processing_time:.2f} seconds")
    print(f" - Throughput: {throughput:.2f} samples/second")

    # Evaluate accuracy and convergence
    eval_results = model.evaluate(testX, testY, verbose=0)
    accuracy = eval_results[1]
    loss = eval_results[0]

    accs.append(accuracy)
    losss.append(loss)

    print(f" - Accuracy: {accuracy:.4f}")
    print(f" - Loss: {loss:.4f}")

    epoch_data["epoch"].append(epoch + 1)
    epoch_data["epoch_memory_usage"].append(0)
    epoch_data["epoch_time"].append(epoch_time)
    epoch_data["batch_processing_time"].append(total_batch_processing_time)
    epoch_data["throughput"].append(throughput)
    epoch_data["accuracy"].append(accuracy)
    epoch_data["loss"].append(loss)



with open('CPU_PYTHON_epoch_data.json', 'w') as json_file:
    json.dump(epoch_data, json_file, indent=4)

print(f"Total Training Time: {total_training_time:.2f} seconds")
print(f"Average Throughput: {sum(throughputs) / num_epochs:.2f} samples/second")
print(f"Average Accuracy: {sum(accs) / num_epochs:.2f}")
print(f"Average Loss: {sum(losss) / num_epochs:.2f}")


Epoch 1/10
 - Epoch Time: 42.31 seconds
 - Batch Processing Time: 42.30 seconds
 - Throughput: 1418.26 samples/second
 - Accuracy: 0.9777
 - Loss: 0.0750
Epoch 2/10
 - Epoch Time: 42.70 seconds
 - Batch Processing Time: 42.69 seconds
 - Throughput: 1405.25 samples/second
 - Accuracy: 0.9743
 - Loss: 0.0878
Epoch 3/10
 - Epoch Time: 42.24 seconds
 - Batch Processing Time: 42.23 seconds
 - Throughput: 1420.59 samples/second
 - Accuracy: 0.9617
 - Loss: 0.1416
Epoch 4/10
 - Epoch Time: 41.89 seconds
 - Batch Processing Time: 41.88 seconds
 - Throughput: 1432.43 samples/second
 - Accuracy: 0.9784
 - Loss: 0.0835
Epoch 5/10
 - Epoch Time: 41.04 seconds
 - Batch Processing Time: 41.03 seconds
 - Throughput: 1462.08 samples/second
 - Accuracy: 0.9767
 - Loss: 0.0913
Epoch 6/10
 - Epoch Time: 42.87 seconds
 - Batch Processing Time: 42.87 seconds
 - Throughput: 1399.54 samples/second
 - Accuracy: 0.9811
 - Loss: 0.0769
Epoch 7/10
 - Epoch Time: 41.05 seconds
 - Batch Processing Time: 41.05 seco