In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks  # type: ignore

# Suppress NUMA and other non-critical warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # Suppresses warnings and info messages
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"  # Avoids NUMA checks


# Custom callback to print device information after each epoch
class DeviceCallback(callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Checking the device details
        devices = tf.config.list_logical_devices("GPU")
        if devices:
            device_name = devices[0].name
            print(f"Epoch {epoch + 1} finished using: {device_name} (GPU)")
        else:
            print(f"Epoch {epoch + 1} finished using: CPU")


# Check if GPU is available
if tf.config.list_physical_devices("GPU"):
    print("Training on GPU...")
else:
    print("No GPU detected. Exiting...")
    exit()

# Load the CIFAR-10 dataset (50,000 training and 10,000 test images)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Normalize the data to range [0, 1]
x_train, x_test = x_train / 255.0, x_test / 255.0

# Build a simple CNN model
model = models.Sequential(
    [
        layers.Conv2D(32, (3, 3), activation="relu", input_shape=(32, 32, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu"),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu"),
        layers.Flatten(),
        layers.Dense(64, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ]
)

# Compile the model
model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

2024-10-31 00:57:09.195201: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-31 00:57:09.209388: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-31 00:57:09.213541: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-31 00:57:09.224185: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Training on GPU...
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


I0000 00:00:1730325432.001280    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730325432.007564    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730325432.007603    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 1us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1730325621.579331    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730325621.579409    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730325621.579429    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730325622.716089    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730325622.716144    8844 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/nu

In [2]:
# Train the model with the custom callback
model.fit(
    x_train,
    y_train,
    epochs=20,
    batch_size=64,
    validation_data=(x_test, y_test),
    callbacks=[DeviceCallback()],
)

Epoch 1/20


I0000 00:00:1730325625.744212    9540 service.cc:146] XLA service 0x7f44900040c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730325625.744298    9540 service.cc:154]   StreamExecutor device (0): Quadro T2000, Compute Capability 7.5
2024-10-31 01:00:25.770337: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-10-31 01:00:25.919705: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m 52/782[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 3ms/step - accuracy: 0.1536 - loss: 2.2616

I0000 00:00:1730325627.816010    9540 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3190 - loss: 1.8302Epoch 1 finished using: /device:GPU:0 (GPU)
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.3191 - loss: 1.8299 - val_accuracy: 0.5136 - val_loss: 1.3317
Epoch 2/20
[1m777/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.5350 - loss: 1.3070Epoch 2 finished using: /device:GPU:0 (GPU)
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.5352 - loss: 1.3066 - val_accuracy: 0.5910 - val_loss: 1.1623
Epoch 3/20
[1m778/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.5925 - loss: 1.1497Epoch 3 finished using: /device:GPU:0 (GPU)
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.5926 - loss: 1.1495 - val_accuracy: 0.6152 - val_loss: 1.0919
Epoch 4/20
[1m776/782[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1

<keras.src.callbacks.history.History at 0x7f459fc65290>

In [3]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc * 100:.2f}%")

313/313 - 1s - 4ms/step - accuracy: 0.7147 - loss: 0.9190
Test accuracy: 71.47%
