In [1]:
import os, time, sys
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # quieter logs

try:
    import tensorflow as tf
except Exception as e:
    print("❌ TensorFlow not importable in this environment.")
    print(e)
    sys.exit(1)

print(f"✅ TensorFlow version: {tf.__version__}")
print(f"Built with CUDA: {getattr(tf.test, 'is_built_with_cuda', lambda: 'n/a')()}")
print(f"XLA available: {tf.config.optimizer.get_jit()}\n")

# List devices
gpus = tf.config.list_physical_devices("GPU")
cpus = tf.config.list_physical_devices("CPU")
print(f"CPUs detected: {[d.name for d in cpus]}")
print(f"GPUs detected: {[d.name for d in gpus]}")

# Enable memory growth (avoids grabbing all VRAM)
for gpu in gpus:
    try:
        tf.config.experimental.set_memory_growth(gpu, True)
    except Exception as e:
        print(f"Warning: couldn't set memory growth on {gpu}: {e}")

if gpus:
    try:
        details = tf.config.experimental.get_device_details(gpus[0])
        cc = details.get("compute_capability", "unknown")
        print(f"GPU[0] details: {details.get('device_name','?')} (compute capability: {cc})")
    except Exception:
        pass
print()

def timed_matmul(device="/CPU:0", size=4096):
    """Time a single large matmul on the given device."""
    with tf.device(device):
        a = tf.random.normal([size, size], dtype=tf.float32)
        b = tf.random.normal([size, size], dtype=tf.float32)
        # warm-up op placement/compilation
        _ = tf.matmul(a, b)
        t0 = time.time()
        c = tf.matmul(a, b)
        _ = c.numpy()  # materialize
        dt = time.time() - t0
        print(f"MatMul {size}x{size} on {device}: {dt:.3f} s")
        return dt

# Run tests
cpu_time = timed_matmul("/CPU:0", size=2048)

gpu_time = None
if gpus:
    try:
        gpu_time = timed_matmul("/GPU:0", size=2048)
    except Exception as e:
        print("⚠️ Tried to run on GPU but failed:")
        print(e)

print("\nSummary:")
if not gpus:
    print("❌ No GPUs visible to TensorFlow.")
    print("   Tips: ensure NVIDIA drivers + CUDA runtime are installed and "
          "install a CUDA-enabled TF/PyTorch wheel that matches your CUDA (or use CPU builds).")
else:
    print("✅ GPU is visible to TensorFlow.")
    if gpu_time is not None:
        speedup = cpu_time / gpu_time if gpu_time > 0 else float('inf')
        print(f"   CPU time: {cpu_time:.3f}s | GPU time: {gpu_time:.3f}s | Speedup: {speedup:.2f}×")
    else:
        print("   But the timed GPU matmul failed; check logs above.")

✅ TensorFlow version: 2.20.0
Built with CUDA: True
XLA available: 

CPUs detected: ['/physical_device:CPU:0']
GPUs detected: ['/physical_device:GPU:0']
GPU[0] details: NVIDIA GeForce RTX 3080 (compute capability: (8, 6))



I0000 00:00:1759984430.523780    5861 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7535 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:01:00.0, compute capability: 8.6


MatMul 2048x2048 on /CPU:0: 0.020 s
MatMul 2048x2048 on /GPU:0: 0.008 s

Summary:
✅ GPU is visible to TensorFlow.
   CPU time: 0.020s | GPU time: 0.008s | Speedup: 2.41×
