VGG网络结构介绍：  
VGG由 5层卷积层、3层全连接层、softmax输出层构成，层与层之间使用max-pooling（最大化池）分开，所有隐层的激活单元都采用ReLU函数。  
VGG使用多个较小卷积核（3x3）的卷积层代替一个卷积核较大的卷积层，一方面可以减少参数，另一方面相当于进行了更多的非线性映射，可以增加网络的拟合/表达能力。  
卷积层全部都是3\*3的卷积核，用conv3-xxx表示，xxx表示通道数。其步长为1，用padding=same填充。  
池化层的池化核为2\*2  


### 模型结构

In [95]:
import torch
import torch.nn as nn
import torch.nn.functional as func

# 网络结构
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()

        # 3 * 224 * 224
        self.conv1_1 = nn.Conv2d(3, 64, 3)  # 64 * 222 * 222
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=(1, 1))  # 64 * 222* 222
        self.maxpool1 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 64 * 112 * 112

        self.conv2_1 = nn.Conv2d(64, 128, 3)  # 128 * 110 * 110
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=(1, 1))  # 128 * 110 * 110
        self.maxpool2 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 128 * 56 * 56

        self.conv3_1 = nn.Conv2d(128, 256, 3)  # 256 * 54 * 54
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=(1, 1))  # 256 * 54 * 54
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=(1, 1))  # 256 * 54 * 54
        self.maxpool3 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 256 * 28 * 28

        self.conv4_1 = nn.Conv2d(256, 512, 3)  # 512 * 26 * 26
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 26 * 26
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 26 * 26
        self.maxpool4 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 512 * 14 * 14

        self.conv5_1 = nn.Conv2d(512, 512, 3)  # 512 * 12 * 12
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 12 * 12
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 12 * 12
        self.maxpool5 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 512 * 7 * 7

        self.fc1 = nn.Linear(512 * 7 * 7, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 2)
        # softmax 1 * 1 * 1000

    def forward(self, x):

        out = self.conv1_1(x)  # 222
        out = func.relu(out)
        out = self.conv1_2(out)  # 222
        out = func.relu(out)
        out = self.maxpool1(out)  # 112

        out = self.conv2_1(out)  # 110
        out = func.relu(out)
        out = self.conv2_2(out)  # 110
        out = func.relu(out)
        out = self.maxpool2(out)  # 56

        out = self.conv3_1(out)  # 54
        out = func.relu(out)
        out = self.conv3_2(out)  # 54
        out = func.relu(out)
        out = self.conv3_3(out)  # 54
        out = func.relu(out)
        out = self.maxpool3(out)  # 28

        out = self.conv4_1(out)  # 26
        out = func.relu(out)
        out = self.conv4_2(out)  # 26
        out = func.relu(out)
        out = self.conv4_3(out)  # 26
        out = func.relu(out)
        out = self.maxpool4(out)  # 14

        out = self.conv5_1(out)  # 12
        out = func.relu(out)
        out = self.conv5_2(out)  # 12
        out = func.relu(out)
        out = self.conv5_3(out)  # 12
        out = func.relu(out)
        out = self.maxpool5(out)  # 7

        # 展平
        out = torch.flatten(out,1,-1)

        out = self.fc1(out)
        out = func.relu(out)
        out = self.fc2(out)
        out = func.relu(out)
        out = self.fc3(out)

        out = func.log_softmax(out, dim=1)
        return out

### 数据加载

In [96]:
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

# 数据加载
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 调整图片大小
    transforms.RandomHorizontalFlip(),  # 随机翻转图片
    transforms.ToTensor(),# 图片转tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 数据预处理
])

train_data = datasets.ImageFolder(root="./data/catVSdog/train_data/", transform=data_transform)
test_data = datasets.ImageFolder(root="./data/catVSdog/test_data/", transform=data_transform)
train_data, _ = random_split(dataset= train_data, lengths=[20,20000-20])
test_data, _ = random_split(dataset= test_data, lengths=[20,5000-20])

train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=5, shuffle=True)

### 模型初始化

In [97]:
import torch.optim as optim
import torch

# 模型初始化
net = VGG16().cuda()

for m in net.modules():
    if isinstance(m, (nn.Conv2d, nn.Linear)):
        nn.init.xavier_uniform_(m.weight)

loss_func = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)


### 模型训练

In [98]:
# 模型训练
epoch_num = 20

max_acc = 0.0
for epoch in range(epoch_num):
    loss_sum = 0.0
    total_batch = 0.0
    torch.cuda.empty_cache()
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()
        outputs = net(inputs)
        # 默认为单批次损失的均值
        loss = loss_func(outputs, labels)
        loss_sum += loss.item()
        total_batch += 1
        loss.backward()
        optimizer.step()

    loss_mean = loss_sum / total_batch
    
    correct_sum = 0.0
    total = 0.0
    torch.cuda.empty_cache()
    for data in test_loader:
        inputs, labels = data[0].cuda(), data[1].cuda()
        with torch.no_grad():
            outputs = torch.exp(net(inputs))
            _, predicts = torch.max(outputs, 1)
            correct_sum += (predicts == labels).sum().item()
        total += labels.size(0)
    
    acc_mean = correct_sum / total * 100

    # 保存准确率更高的模型
    print(acc_mean)
    if acc_mean > 30.0 and acc_mean > max_acc:
        max_acc = acc_mean
        print("保存模型")
        torch.save(net.state_dict(),'./data/VGG16_{}.rui'.format(acc_mean))

    print("第{}次迭代完成,损失为:{:.8f},准确率为:{:.8f}%".format(epoch + 1, loss_mean, acc_mean))

65.0
保存模型
第1次迭代完成,损失为:0.70041561,准确率为:65.00000000%
35.0
第2次迭代完成,损失为:0.69667909,准确率为:35.00000000%
35.0
第3次迭代完成,损失为:0.69060391,准确率为:35.00000000%
35.0
第4次迭代完成,损失为:0.68011290,准确率为:35.00000000%
35.0
第5次迭代完成,损失为:0.67210546,准确率为:35.00000000%
35.0
第6次迭代完成,损失为:0.66499066,准确率为:35.00000000%
35.0
第7次迭代完成,损失为:0.65903562,准确率为:35.00000000%
35.0
第8次迭代完成,损失为:0.65462473,准确率为:35.00000000%
35.0
第9次迭代完成,损失为:0.64861903,准确率为:35.00000000%
35.0
第10次迭代完成,损失为:0.64944762,准确率为:35.00000000%
35.0
第11次迭代完成,损失为:0.64803702,准确率为:35.00000000%
35.0
第12次迭代完成,损失为:0.64720500,准确率为:35.00000000%
35.0
第13次迭代完成,损失为:0.64698094,准确率为:35.00000000%
35.0
第14次迭代完成,损失为:0.64628369,准确率为:35.00000000%
35.0
第15次迭代完成,损失为:0.64696103,准确率为:35.00000000%
35.0
第16次迭代完成,损失为:0.64713740,准确率为:35.00000000%
35.0
第17次迭代完成,损失为:0.64650136,准确率为:35.00000000%
35.0
第18次迭代完成,损失为:0.64638373,准确率为:35.00000000%
35.0
第19次迭代完成,损失为:0.64646152,准确率为:35.00000000%
35.0
第20次迭代完成,损失为:0.64688513,准确率为:35.00000000%


In [99]:
# 模型保存
# torch.save(net.state_dict(),'./Model')
# 模型加载

net.load_state_dict(torch.load("./data/Model"))

correct_sum = 0.0
total = 0.0
torch.cuda.empty_cache()
for data in test_loader:
    inputs, labels = data[0].cuda(), data[1].cuda()
    with torch.no_grad():
        outputs = net(inputs)
        outputs = torch.exp(outputs)
        _, predicts = torch.max(outputs, 1)
        correct_sum += (predicts == labels).sum().item()
    total += labels.size(0)
    
acc_mean = correct_sum / total * 100
print("模型准确率:{:.4f}".format(acc_mean))


模型准确率:90.0000
