In [1]:
import torch

### create tensor data

In [2]:
x = torch.randn(5, 3)
y = torch.ones_like(x)
# tensor = torch.ones(5, 3)

In [3]:
x,y

(tensor([[-0.5614,  0.0657,  1.5142],
         [ 0.7240, -1.0735, -0.8564],
         [ 0.4823,  0.9629, -1.3442],
         [-0.1726,  1.1242, -0.7102],
         [-1.3811, -2.4411, -0.1904]]),
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]))

In [4]:
z1 = x + y
z1

tensor([[ 0.4386,  1.0657,  2.5142],
        [ 1.7240, -0.0735,  0.1436],
        [ 1.4823,  1.9629, -0.3442],
        [ 0.8274,  2.1242,  0.2898],
        [-0.3811, -1.4411,  0.8096]])

### Moving to GPU

In [5]:
x = x.cuda()
y = y.cuda()
z2 = x + y
x,y,z2

(tensor([[-0.5614,  0.0657,  1.5142],
         [ 0.7240, -1.0735, -0.8564],
         [ 0.4823,  0.9629, -1.3442],
         [-0.1726,  1.1242, -0.7102],
         [-1.3811, -2.4411, -0.1904]], device='cuda:0'),
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]], device='cuda:0'),
 tensor([[ 0.4386,  1.0657,  2.5142],
         [ 1.7240, -0.0735,  0.1436],
         [ 1.4823,  1.9629, -0.3442],
         [ 0.8274,  2.1242,  0.2898],
         [-0.3811, -1.4411,  0.8096]], device='cuda:0'))

In [6]:
### 数据必须在同一设备才能操作

In [7]:
try: z1 + z2
except Exception as e: print(e)

Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!


In [8]:
z2 = z2.to("cpu")
print(z1 + z2)

tensor([[ 0.8773,  2.1314,  5.0284],
        [ 3.4480, -0.1470,  0.2871],
        [ 2.9646,  3.9257, -0.6884],
        [ 1.6548,  4.2484,  0.5797],
        [-0.7622, -2.8822,  1.6191]])


### Neural network

In [9]:
import torch.nn as nn
import torch.nn.functional as F

# 定义一个简单的神经网络类
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # super(Net, self).__init__()
        # 定义每一层卷积神经网络，输入通道维度=1，输出通道维度=6，卷积核大小3*3
        self.conv1 = nn.Conv2d(1, 6, 3)
        # 定义第二层卷积神经网络，输入通道维度=6，输出通道维度=16，卷积核大小3*3
        self.conv2 = nn.Conv2d(6, 16, 3)   #6×6×16
        # 定义三层全连接网络
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # 在(2,2)的池化窗口下执行最大池化操作
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x)) #
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        # 计算size,除了第0个 维度上的batch_size
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

1. 第一层卷积：
   - 输入大小：32×32
   - 卷积核大小：3×3
   - 输出通道维度：6
   - 使用零填充（padding=0），步幅（stride=1）
   - 计算公式：$output\_size = \frac{{input\_size - kernel\_size + 2 \times padding}}{{stride}} + 1 = \frac{{32 - 3 + 2 \times 0}}{{1}} + 1 = 30$
   - 输出大小：30×30

2. 第一次最大池化：
   - 输入大小：30×30
   - 池化窗口大小：2×2
   - 使用步幅（stride=2）
   - 计算公式：$output\_size = \frac{{input\_size}}{{stride}} = \frac{{30}}{{2}} = 15$
   - 输出大小：15×15

3. 第二层卷积：
   - 输入大小：15×15
   - 卷积核大小：3×3
   - 输出通道维度：16
   - 使用零填充（padding=0），步幅（stride=1）
   - 计算公式：$output\_size = \frac{{input\_size - kernel\_size + 2 \times padding}}{{stride}} + 1 = \frac{{15 - 3 + 2 \times 0}}{{1}} + 1 = 13$
   - 输出大小：13×13

4. 第二次最大池化：
   - 输入大小：13×13
   - 池化窗口大小：2×2
   - 使用步幅（stride=2）
   - 计算公式：$output\_size = \frac{{input\_size}}{{stride}} = \frac{{13}}{{2}} = 6.5$（结果向下取整）
   - 输出大小：6×6

最后，在卷积和池化的过程中，图像的大小从32×32逐步减小到6×6。在这之后，将数据展平，并通过全连接层进行处理。

![16970320504401697032050389.png](https://fastly.jsdelivr.net/gh/Chenjiangwen/ImageHostingService@main/pic/16970320504401697032050389.png)

In [10]:
# import matplotlib.pyplot as plt
# 
# img = plt.imread('img.png')
# 
# fig = plt.figure(figsize=(20, 15))
# plt.imshow(img)
# plt.axis('on')
# plt.show()


In [11]:
net = Net()
net.to("cuda")

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [12]:
for name, param in net.named_parameters():
    print('{: <30} {: <30} {: <20}'.format(name, str(param.shape), str(param.numel())))
print("总参数量： {}".format(sum(x.numel() for x in net.parameters())))

conv1.weight                   torch.Size([6, 1, 3, 3])       54                  
conv1.bias                     torch.Size([6])                6                   
conv2.weight                   torch.Size([16, 6, 3, 3])      864                 
conv2.bias                     torch.Size([16])               16                  
fc1.weight                     torch.Size([120, 576])         69120               
fc1.bias                       torch.Size([120])              120                 
fc2.weight                     torch.Size([84, 120])          10080               
fc2.bias                       torch.Size([84])               84                  
fc3.weight                     torch.Size([10, 84])           840                 
fc3.bias                       torch.Size([10])               10                  
总参数量： 81194


### 输入、输出

In [13]:
input = torch.randn(1, 1, 32, 32).to("cuda")
print('input', input.size(), input.type())
out = net(input)
print(out)


input torch.Size([1, 1, 32, 32]) torch.cuda.FloatTensor
tensor([[ 0.0037,  0.0155,  0.0124, -0.0108,  0.0722,  0.0006, -0.1146, -0.0632,
          0.0856,  0.0920]], device='cuda:0', grad_fn=<AddmmBackward0>)


### 损失

In [14]:
target = torch.randn(10).cuda()
print(target.shape)
target = target.view(1,-1)
print(target.size())
criterion = nn.MSELoss()
loss = criterion(out, target)
print(loss)

torch.Size([10])
torch.Size([1, 10])
tensor(0.6184, device='cuda:0', grad_fn=<MseLossBackward0>)


### Input -> Conv1 -> ReLu -> MaxPool -> Conv2 -> ReLu -> MaxPool
###      -> view -> FC_linear -> Relu -> FC_linear -> ReLu -> FC_linear
###      -> MSELoss
###      ->loss

In [15]:
net.zero_grad()  #梯度清零，否则不同批次数据之间的梯度会累加
print(net.conv1.bias.grad)

loss.backward()

print(net.conv1.bias.grad)

None
tensor([ 0.0003, -0.0047, -0.0078, -0.0055,  0.0070,  0.0095], device='cuda:0')


In [16]:
### 反向传播 loss.backward()

In [17]:
# 跟踪loss反向传播
print(loss.grad_fn)  #MSELoss
print(loss.grad_fn.next_functions[0][0])  #linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])   #ReLu

<MseLossBackward0 object at 0x0000015343FE41C0>
<AddmmBackward0 object at 0x0000015343FE40A0>
<AccumulateGrad object at 0x0000015343FE41C0>


### 优化器 更新网络参数

In [18]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)

optimizer.zero_grad()

output = net(input)
loss = criterion(output, target)

loss.backward()

# 更新参数
optimizer.step()

print('总结：1.构建网络 2.定义损失函数 3.反向传播 4.更新参数\n'
      '循环训练直到满意loss，结束训练 -> 模型 -> do something')

总结：1.构建网络 2.定义损失函数 3.反向传播 4.更新参数
循环训练直到满意loss，结束训练 -> 模型 -> do something


## cifar-10

| 类别  | plane | car | bird | cat | deer | dog | frog | horse | ship | truck |
| ----- | ----- | --- | ---- | --- | ---- | --- | ---- | ----- | ---- | ----- |
| 准确率 | 69%   | 69% | 27%  | 31% | 42%  | 64% | 59%  | 65%   | 59%  | 62%   |

https://cjwen-imageclassification-resnet18-cifar10.streamlit.app/