In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

In [None]:
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.12.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# example of loading the mnist dataset
# load dataset
(trainX, trainY), (testX, testY) = mnist.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape))
print('Test: X=%s, y=%s' % (testX.shape, testY.shape))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Train: X=(60000, 28, 28), y=(60000,)
Test: X=(10000, 28, 28), y=(10000,)


In [3]:
# reshape dataset to have a single channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))

In [4]:

# one hot encode target values
trainY = to_categorical(trainY)
testY = to_categorical(testY)

In [5]:
trainX = trainX.astype('float32')
testX = testX.astype('float32')
# normalize to range 0-1
trainX = trainX / 255.0
testX = testX / 255.0

# SMALL

In [None]:
model_type = "SMALL"

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 dense_4 (Dense)             (None, 128)               100480    
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


#MEDIUM

In [6]:
model_type = "MEDIUM"

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='selu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='selu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='selu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 3136)              0         
                                                                 
 dense (Dense)               (None, 64)                2

# BIG

In [None]:
model.summary()

model_type = "MEDIUM"

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='selu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), padding='same', strides=(1, 1), activation='selu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(256, (3, 3), padding='same', strides=(1, 1), activation='selu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='selu'),
    tf.keras.layers.Dense(256, activation='selu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 28, 28, 64)        640       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 14, 14, 64)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 14, 14, 128)       73856     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 7, 7, 128)        0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 7, 7, 256)         295168    
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 3, 3, 256)       

#Compile and summary

In [8]:

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])


# GPU CODE

In [None]:
import time
import json

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

epoch_data = {
    "epoch": [],
    "epoch_memory_usage": [],
    "epoch_time": [],
    "batch_processing_time": [],
    "throughput": [],
    "accuracy": [],
    "loss": [],
}

# Training configuration
batch_size = 64
num_epochs = 10

# Initialize variables for monitoring
total_training_time = 0
total_memory_usage = 0

throughputs = []
accs = []
losss = []

# Start training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0
    epoch_memory_use = 0



    epoch_start_time = time.time()

    for batch in range(0, len(trainX), batch_size):
        batch_start_time = time.time()

        # Extract a batch of data
        batch_x = trainX[batch:batch + batch_size]
        batch_y = trainY[batch:batch + batch_size]

        # Perform training step
        with tf.device('/GPU:0'):
            batch_history = model.train_on_batch(batch_x, batch_y)

        batch_processing_time = time.time() - batch_start_time
        total_batch_processing_time += batch_processing_time

        # Calculate memory usage (Note: This is an approximation)
        memory_usage = tf.config.experimental.get_memory_info('GPU:0')['current']
        epoch_memory_use += memory_usage

    epoch_end_time = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time
    total_memory_usage += epoch_memory_use

    num_samples = len(trainX)
    steps_per_epoch = num_samples // batch_size
    throughput = num_samples / epoch_time

    throughputs.append(throughput)

    print(f" - Memory Usage: {epoch_memory_use:.2f} bytes")
    print(f" - Epoch Time: {epoch_time:.2f} seconds")
    print(f" - Batch Processing Time: {total_batch_processing_time:.2f} seconds")
    print(f" - Throughput: {throughput:.2f} samples/second")

    # Evaluate accuracy and convergence
    eval_results = model.evaluate(testX, testY, verbose=0)
    accuracy = eval_results[1]
    loss = eval_results[0]

    accs.append(accuracy)
    losss.append(loss)

    print(f" - Accuracy: {accuracy:.4f}")
    print(f" - Loss: {loss:.4f}")

    epoch_data["epoch"].append(epoch + 1)
    epoch_data["epoch_memory_usage"].append(epoch_memory_use)
    epoch_data["epoch_time"].append(epoch_time)
    epoch_data["batch_processing_time"].append(total_batch_processing_time)
    epoch_data["throughput"].append(throughput)
    epoch_data["accuracy"].append(accuracy)
    epoch_data["loss"].append(loss)



with open(f'{model_type}_GPU_PYTHON_epoch_data.json', 'w') as json_file:
    json.dump(epoch_data, json_file, indent=4)

print(f"Total Training Time: {total_training_time:.2f} seconds")
print(f"Total Memory Usage: {total_memory_usage:.2f} bytes")
print(f"Average Memory Usage: {total_memory_usage / num_epochs:.2f} bytes")
print(f"Average Throughput: {sum(throughputs) / num_epochs:.2f} samples/second")
print(f"Average Accuracy: {sum(accs) / num_epochs:.2f}")
print(f"Average Loss: {sum(losss) / num_epochs:.2f}")


Epoch 1/10
 - Memory Usage: 91700360960.00 bytes
 - Epoch Time: 16.70 seconds
 - Batch Processing Time: 16.68 seconds
 - Throughput: 3592.85 samples/second
 - Accuracy: 0.9699
 - Loss: 0.0988
Epoch 2/10
 - Memory Usage: 123790825472.00 bytes
 - Epoch Time: 12.77 seconds
 - Batch Processing Time: 12.75 seconds
 - Throughput: 4699.40 samples/second
 - Accuracy: 0.9789
 - Loss: 0.0833
Epoch 3/10
 - Memory Usage: 162771043840.00 bytes
 - Epoch Time: 12.95 seconds
 - Batch Processing Time: 12.93 seconds
 - Throughput: 4634.10 samples/second
 - Accuracy: 0.9694
 - Loss: 0.1393
Epoch 4/10
 - Memory Usage: 192562043392.00 bytes
 - Epoch Time: 12.88 seconds
 - Batch Processing Time: 12.86 seconds
 - Throughput: 4657.43 samples/second
 - Accuracy: 0.9842
 - Loss: 0.0715
Epoch 5/10
 - Memory Usage: 222352764928.00 bytes
 - Epoch Time: 12.65 seconds
 - Batch Processing Time: 12.63 seconds
 - Throughput: 4744.28 samples/second
 - Accuracy: 0.9877
 - Loss: 0.0663
Epoch 6/10
 - Memory Usage: 25214376

# CPU PART

In [9]:
import tensorflow as tf
import time
import json

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

epoch_data = {
    "epoch": [],
    "epoch_time": [],
    "batch_processing_time": [],
    "throughput": [],
    "accuracy": [],
    "loss": [],
}

# Training configuration
batch_size = 64
num_epochs = 10

# Initialize variables for monitoring
total_training_time = 0
total_memory_usage = 0

throughputs = []
accs = []
losss = []

# Start training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0
    epoch_memory_use = 0



    epoch_start_time = time.time()

    for batch in range(0, len(trainX), batch_size):
        batch_start_time = time.time()

        # Extract a batch of data
        batch_x = trainX[batch:batch + batch_size]
        batch_y = trainY[batch:batch + batch_size]

        # Perform training step on CPU (no need to specify device)
        batch_history = model.train_on_batch(batch_x, batch_y)

        batch_processing_time = time.time() - batch_start_time
        total_batch_processing_time += batch_processing_time



    epoch_end_time = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time

    num_samples = len(trainX)
    steps_per_epoch = num_samples // batch_size
    throughput = num_samples / epoch_time

    throughputs.append(throughput)

    print(f" - Epoch Time: {epoch_time:.2f} seconds")
    print(f" - Batch Processing Time: {total_batch_processing_time:.2f} seconds")
    print(f" - Throughput: {throughput:.2f} samples/second")

    # Evaluate accuracy and convergence
    eval_results = model.evaluate(testX, testY, verbose=0)
    accuracy = eval_results[1]
    loss = eval_results[0]

    accs.append(accuracy)
    losss.append(loss)

    print(f" - Accuracy: {accuracy:.4f}")
    print(f" - Loss: {loss:.4f}")

    epoch_data["epoch"].append(epoch + 1)
    epoch_data["epoch_time"].append(epoch_time)
    epoch_data["batch_processing_time"].append(total_batch_processing_time)
    epoch_data["throughput"].append(throughput)
    epoch_data["accuracy"].append(accuracy)
    epoch_data["loss"].append(loss)



with open('CPU_PYTHON_epoch_data.json', 'w') as json_file:
    json.dump(epoch_data, json_file, indent=4)

print(f"Total Training Time: {total_training_time:.2f} seconds")
print(f"Average Throughput: {sum(throughputs) / num_epochs:.2f} samples/second")
print(f"Average Accuracy: {sum(accs) / num_epochs:.2f}")
print(f"Average Loss: {sum(losss) / num_epochs:.2f}")


Epoch 1/10
 - Epoch Time: 52.77 seconds
 - Batch Processing Time: 52.77 seconds
 - Throughput: 1136.96 samples/second
 - Accuracy: 0.9858
 - Loss: 0.0876
Epoch 2/10
 - Epoch Time: 51.25 seconds
 - Batch Processing Time: 51.25 seconds
 - Throughput: 1170.72 samples/second
 - Accuracy: 0.9868
 - Loss: 0.0799
Epoch 3/10
 - Epoch Time: 52.03 seconds
 - Batch Processing Time: 52.03 seconds
 - Throughput: 1153.21 samples/second
 - Accuracy: 0.9869
 - Loss: 0.0755
Epoch 4/10
 - Epoch Time: 51.24 seconds
 - Batch Processing Time: 51.23 seconds
 - Throughput: 1171.06 samples/second
 - Accuracy: 0.9869
 - Loss: 0.0744
Epoch 5/10
 - Epoch Time: 51.87 seconds
 - Batch Processing Time: 51.87 seconds
 - Throughput: 1156.65 samples/second
 - Accuracy: 0.9885
 - Loss: 0.0775
Epoch 6/10
 - Epoch Time: 51.42 seconds
 - Batch Processing Time: 51.42 seconds
 - Throughput: 1166.79 samples/second
 - Accuracy: 0.9891
 - Loss: 0.0700
Epoch 7/10
 - Epoch Time: 51.31 seconds
 - Batch Processing Time: 51.31 seco