In [9]:
import torch
import torch.nn as nn

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [10]:
class AlexNet(nn.Module):
    def __init__(self, in_dim, n_class) -> None:
        super().__init__()
        # Conv layer
        self.conv = nn.Sequential(
            # kernel: (in_dim x 11 x 11)
            nn.Conv2d(
                # the channel size
                in_channels=in_dim,
                out_channels=96,
                kernel_size=11, 
                stride=4,
                padding=0
            ),
            # feature map: (in_dim x 227 x 227) -> (96 x 55 x 55)

            nn.BatchNorm2d(96),
            nn.ReLU(True),
            # parms: nn.MaxPool2d(kernel_size, stride)
            nn.MaxPool2d(3, 2),
            # feature map: (96 x 55 x 55) -> (96 x 27 x 27)

            # kernel: (96 x 256 x 256)
            nn.Conv2d(96, 256, 5, stride=1, padding=2),
            # feature map: (96 x 27 x 27) -> (256 x 27 x 27)
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.MaxPool2d(3, 2),
            # feature map: (256 x 27 x 27) -> (256 x 13 x 13)

            # kernel: (384 x 3 x 3)
            nn.Conv2d(256, 384, 3, stride=1, padding=1),
            # feature map: (256 x 13 x 13) -> (384 x 13 x 13)
            nn.BatchNorm2d(384),
            nn.ReLU(True),

            # kernel: (384 x 3 x 3)
            nn.Conv2d(384, 384, 3, stride=1, padding=1),
            # feature map: (384 x 13 x 13) -> (384 x 13 x 13)
            nn.BatchNorm2d(384),
            nn.ReLU(True),

            # kernel: (256 x 3 x 3)
            nn.Conv2d(384, 256, 3, stride=1, padding=1),
            # feature map: (384 x 13 x 13) -> (256 x 13 x 13)
            nn.BatchNorm2d(256),
            nn.ReLU(True),
        
            nn.MaxPool2d(3, 2)
            # feature map: (256 x 13 x 13) -> (256 x 6 x 6)
        )
        
        # full-connective layer
        self.fc = nn.Sequential(
            nn.Linear(9216, 4096),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(4096, n_class)
        )

    def forward(self,x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)  # (batch,256,6,6) -> (batch,256*6*6)
        output = self.fc(x)
        return output

In [11]:
from torchvision import datasets, transforms as T


batch_size = 64

"""步骤整合 -> 图像增强：
    1. 调整图像尺寸（227 x 227)
    2. 以0.5的概率水平翻转给定的PIL图像
    3. To a tensor
"""
train_transform = T.Compose([
    T.Resize((227, 227)),
    T.RandomHorizontalFlip(0.5),
    T.ToTensor()
])

test_transform = T.Compose([
    T.Resize((227, 227)),
    T.ToTensor()
])

# mnist数据集：0-9的手写数字数据集，训练集60k，测试集10k，类别10，图像：(28x28x1)
train_dataset = datasets.MNIST(root=r'data\mnist',
                               train=True,
                               transform=train_transform,
                               download=True)

test_dataset = datasets.MNIST(root=r'data\mnist',
                               train=False,
                               transform=train_transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [12]:
# 图像为单通道，有10个类别
model = AlexNet(1, 10)

model.to(device)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): Conv2d(384, 256, kernel_si

In [13]:
# 为CPU设置种子用于生成随机数，以使得结果是确定的。
torch.manual_seed(1)
# 学习率
learning_rate = 1e-3
# 训练轮数
num_epochs = 3
# 优化算法Adam = RMSProp + Momentum (梯度、lr两方面优化下降更快更稳)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 
# 交叉熵损失函数
loss_fn = torch.nn.CrossEntropyLoss()  

In [14]:
def evaluate_accuracy(data_iter, model):
    '''
        评估模型预测精度
    '''
    total = 0
    correct = 0 
    # 循环内的每个张量都将requires_grad设置为False。当前与当前计算图相连的任何具有梯度的张量现在都与当前图分离。不再能够计算关于这个张量的梯度。
    with torch.no_grad():
        model.eval() # 评估模式，batchNorm层，dropout层等用于优化训练而添加的网络层会被关闭
        for images,labels in data_iter:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images) # 1 x 10 行向量
            _,predicts = torch.max(outputs.data, dim=1) # return: (value, index); dim=1时，按行返回最大值所在索引
            total += labels.size(0)
            correct += (predicts == labels).cpu().sum()
    return 100 * correct / total

In [15]:
def train(data_loader=train_loader, optimizer=optimizer, loss_fn=loss_fn, epochs=num_epochs, device=device):
    for epoch in range(epochs):
        print('current epoch = {}'.format(epoch))
        for i,(images,labels) in enumerate(data_loader):
            train_accuracy_total = 0
            train_correct = 0
            train_loss_sum = 0
            model.train() # 训练模式，启用batch normalization和dropout 
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, labels)   # 计算模型的损失
            optimizer.zero_grad()            # 在做反向传播前先清除网络状态
            loss.backward()                  # 损失值进行反向传播
            optimizer.step()                 # 参数迭代更新

            train_loss_sum += loss.item()    # item()返回的是tensor中的值，且只能返回单个值（标量），不能返回向量，使用返回loss等
            _,predicts = torch.max(outputs.data, dim=1)  # 输出10类中最大的那个值
            train_accuracy_total += labels.size(0)
            train_correct += (predicts == labels).cpu().sum().item()
        test_acc = evaluate_accuracy(test_loader, model)
        print('epoch:{0},   loss:{1:.4f},   train accuracy:{2:.3f},  test accuracy:{3:.3f}'.format(
                epoch, train_loss_sum / batch_size, train_correct / train_accuracy_total, test_acc))
    print('------------finish training-------------')


In [16]:
train()

current epoch = 0
epoch:0,   loss:0.0079,   train accuracy:0.844,  test accuracy:92.030
current epoch = 1
epoch:1,   loss:0.0009,   train accuracy:1.000,  test accuracy:95.050
current epoch = 2
epoch:2,   loss:0.0025,   train accuracy:0.938,  test accuracy:96.670
------------finish training-------------
