In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)      # Already on by default

<torch.autograd.grad_mode.set_grad_enabled at 0x21b1dfe9ee0>

In [3]:
print(torch.__version__)
print(torchvision.__version__)

1.13.1
0.14.1


In [4]:
def get_num_correct(preds, labels):       # 预测正确的数量
    return preds.argmax(dim=1).eq(labels).sum().item()

In [5]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)     # Linear = fully connected(fc) = dense
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)                   # 卷积层中不包含激活函数，需要手动添加
        t = F.max_pool2d(t, kernel_size=2, stride=2)     # 池化操作（没有权重的函数不能称为层）
        
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12*4*4)       # 必须手动flatten
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        # t = F.softmax(t, dim=1)           # 不直接在forward中用softmax，而是在训练过程中用cross-entropy损失函数计算loss，其中自带softmax
        
        return t

In [6]:
network = Network()

In [7]:
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [8]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
batch = next(iter(train_loader))
images, labels = batch

In [21]:
images.shape

torch.Size([100, 1, 28, 28])

In [22]:
labels.shape

torch.Size([100])

## Calculating the loss

In [9]:
preds = network(images)
loss = F.cross_entropy(preds, labels)
loss.item()

2.302035093307495

In [23]:
preds.shape

torch.Size([100, 10])

## Calculating the gradients

In [10]:
print(network.conv1.weight.grad)     # 暂时没有梯度

None


In [11]:
loss.backward()      # 计算梯度

In [12]:
network.conv1.weight.grad.shape      # 反向传播后就有了梯度

torch.Size([6, 1, 5, 5])

## Update the weights

In [13]:
optimizer = optim.Adam(network.parameters(), lr=0.01)   # 优化器使用Adam

In [14]:
loss.item()

2.302035093307495

In [15]:
get_num_correct(preds, labels)

9

In [16]:
optimizer.step()    # updating the weights

In [24]:
preds = network(images)
loss = F.cross_entropy(preds, labels)

In [25]:
loss.item()

2.2594704627990723

In [26]:
get_num_correct(preds, labels)

15

## 一个batch的训练代码

In [27]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)   # 优化器使用Adam

batch = next(iter(train_loader))     # 取得一个batch的数据
images, labels = batch

preds = network(images)     # Pass batch
loss = F.cross_entropy(preds, labels)   # Calculate loss

loss.backward()    # Calculate gradients
optimizer.step()   # Update weights

# ----------------------------------------------

print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())

loss1: 2.3067712783813477
loss2: 2.281315565109253
