## 11 CNN网络-Alex模型的Pytorch实现

In [7]:
import torch
import torchvision
from torch import nn, optim
from torch.nn import init
import numpy as np
import sys
from tqdm import tqdm  # 加入进度条库，方便查看训练进度

device="cuda:0"   # 选择训练设备（CPU或GPU）

### 获取数据集（仍然使用Fashion-MNIST）

保持与原论文相同的输入，将输入图像数据resize为224×224

In [8]:
def load_data_fashion_mnist(batch_size, resize=None, root='Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    
    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

In [9]:
batch_size = 128
img_resize = 224
train_iter, test_iter = load_data_fashion_mnist(batch_size, img_resize)

### 定义模型网络等

In [10]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0],-1)

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.Conv=nn.Sequential(
            nn.Conv2d(1, 96, 11, 4),     # (224, 224, 1)->(55, 55, 96)
            nn.ReLU(),               
            nn.MaxPool2d(3, 2),          # (55, 55, 96)->(27, 27, 96)
            nn.Conv2d(96, 256, 5, 1, 2), # (27, 27, 96)->(27, 27, 256)
            nn.ReLU(),
            nn.MaxPool2d(3, 2),          # (27, 27, 256)->(13, 13, 256)
            nn.Conv2d(256, 384, 3, 1, 1),# (13, 13, 256)->(12, 12, 384)
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, 1, 1),# (12, 12, 384)->(11, 11, 384)
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, 1, 1),# (11, 11, 384)->(10, 10, 256)
            nn.MaxPool2d(3, 2),
        ).to(device)
        self.DNN=nn.Sequential(
            FlattenLayer(),
            nn.Linear(5*5*256, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10),
        ).to(device)
    def forward(self, img_input):
        return self.DNN(self.Conv(img_input))

net=AlexNet()
print(net)

loss=nn.CrossEntropyLoss().to(device)
optimizer=optim.Adam(net.parameters(), lr=0.001)

def test_accuracy(data_iter,net):
    acc_sum, n=0.0, 0
    for X,y in data_iter:
        X=X.to(device)
        y=y.to(device)
        acc_sum+=(net(X).argmax(dim=1)==y).float().sum().item()  #注意这里是sum不是mean 之后会÷n
        n+=y.shape[0]
    return acc_sum / n

AlexNet(
  (Conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (DNN): Sequential(
    (0): FlattenLayer()
    (1): Linear(in_features=6400, out_features=4096, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.5, inplace=Fals

### 训练

In [11]:
num_epochs=5

def train(net,train_iter,test_iter,loss,num_epochs):

    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n=0.0,0.0,0
        net.train()
        for X,y in tqdm(train_iter):
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y).sum()
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum+=l.item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=y.shape[0]
        net.eval()
        test_acc=test_accuracy(test_iter,net)
        print("epoch %d, train_loss %.3f, train_acc %.3f, test_acc %.3f"% (epoch+1, train_l_sum / n, train_acc_sum / n, test_acc))
    

In [12]:
train(net, train_iter, test_iter, loss, num_epochs)
torch.save(net.state_dict(),'Model_Save/11_net_param.pt')

100%|██████████| 469/469 [00:20<00:00, 23.11it/s]


epoch 1, train_loss 0.005, train_acc 0.750, test_acc 0.842


100%|██████████| 469/469 [00:19<00:00, 24.05it/s]


epoch 2, train_loss 0.003, train_acc 0.866, test_acc 0.874


100%|██████████| 469/469 [00:19<00:00, 24.24it/s]


epoch 3, train_loss 0.002, train_acc 0.888, test_acc 0.894


100%|██████████| 469/469 [00:19<00:00, 24.09it/s]


epoch 4, train_loss 0.002, train_acc 0.900, test_acc 0.893


100%|██████████| 469/469 [00:19<00:00, 24.08it/s]


epoch 5, train_loss 0.002, train_acc 0.908, test_acc 0.908
