# GPU

In [1]:
!nvidia-smi

Mon Aug 26 12:02:55 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 ...    Off | 00000000:01:00.0 Off |                  N/A |
| N/A   39C    P0              N/A /  55W |     14MiB /  8188MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

### 计算设备

In [4]:
import torch
from torch import nn

torch.device('cpu'), torch.cuda.device('cuda'), torch.cuda.device('cuda')

(device(type='cpu'),
 <torch.cuda.device at 0x7fa352fc6c50>,
 <torch.cuda.device at 0x7fa352fc6650>)

### 查询可用gpu的数量

In [5]:
torch.cuda.device_count()

1

### 这两个函数允许我们在请求的GPU不存在的情况下运行代码

In [8]:
def try_gpu(i = 0):
    """如果存在，则返回gpu(i)，否则返回cpu()。"""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    """返回所有可用的GPU，如果没有GPU，则返回[cpu(),]。"""
    devices = [
        torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

### 查询张量所在设备

In [6]:
x = torch.tensor([1, 2, 3])
x.device

device(type='cpu')

### 存储在GPU上

In [11]:
X = torch.ones(2, 3, device = try_gpu())
X.device

device(type='cuda', index=0)

### 第二个GPU上创建一个随机张量

In [13]:
Y = torch.rand(2, 3, device = try_gpu(1))  # 因为我们只有一张GPU，所以只能将其存储在CPU上
Y.device

device(type='cpu')

### 要计算$X + Y$，我们需要决定在哪里执行这个操作

In [22]:
# 在GPU上进行计算，最好确保两个变量都在同一张GPU上，因为在GPU与CPU之间进行频繁的数据传输，会导致性能问题
Z = Y.cuda(0)
print(Y)
print(Z)

tensor([[0.5665, 0.7005, 0.4085],
        [0.3471, 0.0925, 0.4717]])
tensor([[0.5665, 0.7005, 0.4085],
        [0.3471, 0.0925, 0.4717]], device='cuda:0')


### 现在数据在同一张GPU上（$Z$和$X$都在），我们可以将它们相加

In [23]:
X + Z

tensor([[1.5665, 1.7005, 1.4085],
        [1.3471, 1.0925, 1.4717]], device='cuda:0')

In [24]:
Z.cuda(0) is Z

True

### 神经网络与GPU

In [25]:
net = nn.Sequential(nn.Linear(3, 1))
net = net.to(device = try_gpu())

net(X)

tensor([[0.4078],
        [0.4078]], device='cuda:0', grad_fn=<AddmmBackward0>)

### 确认模型参数存储在同一个GPU上

In [27]:
net[0].weight.data.device

device(type='cuda', index=0)