In [19]:
!nvidia-smi

Tue Nov  1 14:32:30 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 512.98       Driver Version: 512.98       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0  On |                  N/A |
| N/A   59C    P8    11W /  N/A |   1384MiB /  6144MiB |      3%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [20]:
import torch
from torch import nn
torch.cuda.is_available()

True

In [21]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i+1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    devices = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

## 张量与GPU

In [22]:
x = torch.rand(2,3)
x.device

device(type='cpu')

In [23]:
y = torch.rand(2, 3, device=try_gpu())
y.device

device(type='cuda', index=0)

**对张量操作时，要确保两个张量位于同一个设备上，否则框架不知道在哪里执行计算，在哪里储存结果**

In [24]:
z = x.cuda(0)
print(f'x: {x.device}')
print(f'y: {y.device}')
print(f'z: {z.device}')

x: cpu
y: cuda:0
z: cuda:0


In [25]:
y+z

tensor([[1.1415, 0.3262, 0.8687],
        [0.7894, 0.2734, 1.5365]], device='cuda:0')

In [26]:
# z已经存在于第一个GPU上，如果继续调用z.cuda(0)，它将返回z。而不会复制并分配新内存
z.cuda(0) is z

True

## 神经网络与GPU

In [30]:
X = torch.ones(2, 3, device=try_gpu())
net = nn.Sequential(nn.Linear(3, 1))
net = net.to(device=try_gpu())

net(X)

tensor([[0.1138],
        [0.1138]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [31]:
# 确定模型参数存储在同一个GPU上
net[0].weight.data.device

device(type='cuda', index=0)