In [1]:
!nvidia-smi

Thu Feb 23 11:01:20 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01   Driver Version: 470.103.01   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:17:00.0 Off |                  N/A |
|  0%   48C    P8    32W / 350W |    419MiB / 12053MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:65:00.0 Off |                  N/A |
|  0%   43C    P8    25W / 350W |      8MiB / 12053MiB |      0%      Default |
|       

In [2]:
import torch
from torch import nn
torch.cuda.is_available()

True

In [6]:
torch.device('cpu'), torch.device('cuda'), torch.device('cuda:1')

(device(type='cpu'), device(type='cuda'), device(type='cuda', index=1))

In [7]:
# 查阅可用GPU数量
torch.cuda.device_count()

2

In [9]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i+1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    devices = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0), device(type='cuda', index=1)])

## 张量与GPU

In [10]:
x = torch.rand(2,3)
x.device

device(type='cpu')

In [11]:
y = torch.rand(2, 3, device=try_gpu())
y.device

device(type='cuda', index=0)

**对张量操作时，要确保两个张量位于同一个设备上，否则框架不知道在哪里执行计算，在哪里储存结果**

In [14]:
z = x.cuda(0)
print(f'x: {x.device}')
print(f'y: {y.device}')
print(f'z: {z.device}')

x: cpu
y: cuda:0
z: cuda:0


In [15]:
y+z

tensor([[1.0077, 0.2307, 0.1670],
        [0.3599, 1.5755, 1.5340]], device='cuda:0')

In [24]:
# z已经存在于第一个GPU上，如果继续调用z.cuda(0)，它将返回z。而不会复制并分配新内存
z.cuda(0) is z

True

## 神经网络与GPU

In [30]:
X = torch.ones(2, 3, device=try_gpu())
net = nn.Sequential(nn.Linear(3, 1))
net = net.to(device=try_gpu())

net(X)

tensor([[0.1138],
        [0.1138]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [31]:
# 确定模型参数存储在同一个GPU上
net[0].weight.data.device

device(type='cuda', index=0)