In [2]:
import torch

In [3]:
# Check if GPU is available
print("CUDA Available: ", torch.cuda.is_available())
print("Number of GPUs: ", torch.cuda.device_count())

CUDA Available:  True
Number of GPUs:  1


In [4]:
# Print GPU details
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

GPU 0: NVIDIA GeForce GTX 1650


In [5]:
# Create a simple tensor computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
a = torch.randn((1000, 1000), device=device)
b = torch.randn((1000, 1000), device=device)
c = torch.matmul(a, b)

In [6]:
print("Computation complete")

# Check GPU memory usage
for i in range(torch.cuda.device_count()):
    print(f"Memory Allocated on GPU {i}: {torch.cuda.memory_allocated(i)} bytes")
    print(f"Memory Cached on GPU {i}: {torch.cuda.memory_reserved(i)} bytes")

Computation complete
Memory Allocated on GPU 0: 20971520 bytes
Memory Cached on GPU 0: 20971520 bytes


In [7]:
import torch

# Check if GPU is available
print("CUDA Available: ", torch.cuda.is_available())
print("Number of GPUs: ", torch.cuda.device_count())

# Print GPU details
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

# Specify the device to use (GPU 0 and GPU 1)
device0 = torch.device('cuda:0' if torch.cuda.device_count() > 0 else 'cpu')
device1 = torch.device('cuda:1' if torch.cuda.device_count() > 1 else 'cpu')

# Create a simple tensor computation on GPU 0
a0 = torch.randn((1000, 1000), device=device0)
b0 = torch.randn((1000, 1000), device=device0)
c0 = torch.matmul(a0, b0)
print("Computation on GPU 0 complete")

# Check GPU 0 memory usage
print(f"Memory Allocated on GPU 0: {torch.cuda.memory_allocated(0)} bytes")
print(f"Memory Cached on GPU 0: {torch.cuda.memory_reserved(0)} bytes")

# Create a simple tensor computation on GPU 1 (if available)
if torch.cuda.device_count() > 1:
    a1 = torch.randn((1000, 1000), device=device1)
    b1 = torch.randn((1000, 1000), device=device1)
    c1 = torch.matmul(a1, b1)
    print("Computation on GPU 1 complete")

    # Check GPU 1 memory usage
    print(f"Memory Allocated on GPU 1: {torch.cuda.memory_allocated(1)} bytes")
    print(f"Memory Cached on GPU 1: {torch.cuda.memory_reserved(1)} bytes")
else:
    print("GPU 1 not available")


CUDA Available:  True
Number of GPUs:  1
GPU 0: NVIDIA GeForce GTX 1650
Computation on GPU 0 complete
Memory Allocated on GPU 0: 32972288 bytes
Memory Cached on GPU 0: 41943040 bytes
GPU 1 not available


In [9]:
%pip install torch-directml


Collecting torch-directml
  Using cached torch_directml-0.2.3.dev240715-cp38-cp38-win_amd64.whl.metadata (6.2 kB)
Downloading torch_directml-0.2.3.dev240715-cp38-cp38-win_amd64.whl (9.0 MB)
   ---------------------------------------- 0.0/9.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.0 MB 262.6 kB/s eta 0:00:34
   ---------------------------------------- 0.1/9.0 MB 508.4 kB/s eta 0:00:18
    --------------------------------------- 0.2/9.0 MB 817.0 kB/s eta 0:00:11
   - -------------------------------------- 0.3/9.0 MB 1.3 MB/s eta 0:00:07
   - -------------------------------------- 0.3/9.0 MB 1.2 MB/s eta 0:00:08
   - -------------------------------------- 0.3/9.0 MB 1.1 MB/s eta 0:00:09
   - -------------------------------------- 0.4/9.0 MB 1.1 MB/s eta 0:00

In [10]:
import torch_directml

# Initialize DirectML devices
device0 = torch_directml.device()
device1 = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Create a simple tensor computation on DirectML device (AMD GPU)
a0 = torch.randn((1000, 1000), device=device0)
b0 = torch.randn((1000, 1000), device=device0)
c0 = torch.matmul(a0, b0)
print("Computation on DirectML (AMD GPU) complete")

# Create a simple tensor computation on CUDA device (NVIDIA GPU)
a1 = torch.randn((1000, 1000), device=device1)
b1 = torch.randn((1000, 1000), device=device1)
c1 = torch.matmul(a1, b1)
print("Computation on CUDA (NVIDIA GPU) complete")


Computation on DirectML (AMD GPU) complete
Computation on CUDA (NVIDIA GPU) complete
