In [43]:
# configure tensorflow for M1 max with 32 GPU cores
import tensorflow as tf
import os

def configure_tensorflow():
    # Physical devices configuration
    physical_devices = tf.config.list_physical_devices()
    print("Available devices:", physical_devices)

    # Configure GPU
    # print("Configuring GPU...", tf.config.list_physical_devices('GPU'))
    try:
        if tf.config.list_physical_devices('GPU') == 1 :
            tf.config.experimental.set_memory_growth(physical_devices[1], True)

        # Set compute units (GPU cores) utilization
        tf.config.experimental.set_virtual_device_configuration(
            physical_devices[1],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=24576)]  # 24GB RAM allocation
        )
    except:
        print("No GPU devices found")

    # CPU configuration
    tf.config.threading.set_intra_op_parallelism_threads(7)  # Match your CPU cores
    tf.config.threading.set_inter_op_parallelism_threads(7)  # Match your CPU cores

    # Mixed precision configuration
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # XLA optimization
    tf.config.optimizer.set_jit(False)

    # Additional performance configurations
    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
    os.environ['TF_GPU_THREAD_COUNT'] = '30'  # Match your GPU cores
    os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
    os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'
    
    # Metal performance configuration
    os.environ['METAL_DEBUG_ERROR_MODE'] = '0'
    os.environ['METAL_DEVICE_WRAPPER_TYPE'] = '0'

    # return tf.config.get_config()

configure_tensorflow()

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [44]:
import tensorflow as tf
# from tf_config import configure_tensorflow
import time

def run_benchmark():
    # Apply configuration
    config = configure_tensorflow()
    
    # Create a simple model for testing
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    # Generate synthetic data
    x_train = tf.random.normal((1000, 1024))
    y_train = tf.random.uniform((1000,), maxval=10, dtype=tf.int32)
    y_train = tf.one_hot(y_train, 10)

    # Compile model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Time the training
    start_time = time.time()
    model.fit(x_train, y_train, batch_size=32, epochs=10)
    end_time = time.time()

    print(f"Training time: {end_time - start_time:.2f} seconds")


run_benchmark()

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.0775 - loss: 3.4819
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5694 - loss: 2.3251
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.7856 - loss: 0.9094
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.8935 - loss: 0.3562
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9439 - loss: 0.1991
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9683 - loss: 0.1092
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9669 - loss: 0.1334
Epoch 8/10
[1m