In [1]:
import os
import os.path
import tqdm
import random
import time
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from PIL import Image
import torch.nn.functional as F
import torchvision.datasets as dsets
import torchvision.transforms as trans
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [30]:
def read_split_data(imgs, labels, test_ratio=0.2, plot=False):
    # random.seed(0)
    assert os.path.exists(imgs), "imgs root:{} does not exist".format(imgs)
    assert os.path.exists(labels), "imgs root:{} does not exist".format(labels)

    train_imgs_path = []
    train_labels = []
    test_imgs_path = []
    test_labels = []
    supported = ['.jpg', '.JPG']

    # 得到所有图片地址
    images_name = os.listdir(imgs)
    images = [os.path.join(imgs, img) for img in os.listdir(imgs)
              if os.path.splitext(img)[-1] in supported]
    # 得到所有图片label
    df = pd.read_csv(labels)
    path_label_dict = {}
    for i in images_name:
        path_label_dict.update({os.path.join(imgs, i): df[df.image_id == i].iloc[0, 1]})

    # 按比例划分test
    test_num = len(images) * test_ratio
    test_path = random.sample(images, int(test_num))

    for img_path in images:
        if img_path in test_path:
            test_imgs_path.append(img_path)
            test_labels.append(path_label_dict[img_path])
        else:
            train_imgs_path.append(img_path)
            train_labels.append(path_label_dict[img_path])

    print("{} images were found in the dataset".format(len(images)))
    print("{} in train_set".format(len(train_labels)))
    print("{} in test_set".format(len(test_labels)))

    return train_imgs_path, train_labels, test_imgs_path, test_labels

In [17]:
class Dataset(Dataset):
    def __init__(self, image_path: list, image_class: list, transform=None):
        self.image_path = image_path
        self.image_class = image_class
        self.transform = transform

    def __len__(self):
        return len(self.image_path)

    def __getitem__(self, item):
        img = Image.open(self.image_path[item]).convert('RGB')
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode".format(self.image_path[item]))
        label = self.image_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

In [32]:
train_imgs_path = ".\\trainimages\\"
train_labels_path = ".\\train.csv"
model_save_path = ".\model_save"

In [19]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224))
])

In [31]:
batch_size = 10
train_imgs, train_labels, test_imgs, test_labels = read_split_data(train_imgs_path, train_labels_path, test_ratio=0.3, plot=True)

train_dataset = Dataset(train_imgs, train_labels, transform)
test_dataset = Dataset(test_imgs, test_labels, transform)

train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size)

3000 images were found in the dataset
2100 in train_set
900 in test_set


In [22]:
# 超参数
BATCH_SIZE = 10
nepochs = 5
LR = 0.001

# 定义损失函数为交叉熵损失 loss_func
loss_func = nn.CrossEntropyLoss()
device = torch.device('cpu')

In [23]:
# 定义卷积层，在VGGNet中，均使用3x3的卷积核
def conv3x3(in_features, out_features): 
    return nn.Conv2d(in_features, out_features, kernel_size=3, padding=1)

In [24]:
# 搭建VGG19，除了卷积层外，还包括2个全连接层（fc_1、fc_2），1个softmax层
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        self.features = nn.Sequential(
            # 1.con1_1
            conv3x3(3, 64),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            # 2.con1_2
            conv3x3(64, 64),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            # 3.con2_1
            conv3x3(64, 128),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            # 4.con2_2
            conv3x3(128, 128),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            # 5.con3_1
            conv3x3(128, 256),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            # 6.con3_2
            conv3x3(256, 256),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            # 7.con3_3
            conv3x3(256, 256),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            # 8.con3_4
            conv3x3(256, 256),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            # 9.con4_1
            conv3x3(256, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            # 10.con4_2
            conv3x3(512, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            # 11.con4_3
            conv3x3(512, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            # 12.con4_4
            conv3x3(512, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2),
            # 13.con5_1
            conv3x3(512, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            # 14.con5_2
            conv3x3(512, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            # 15.con5_3
            conv3x3(512, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            # 16.con5_4
            conv3x3(512, 512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2),
            )

        self.classifier = nn.Sequential(
            # 17.fc_1
            nn.Linear(25088, 4096),
            nn.ReLU(),
            nn.Dropout(),
            # 18.fc_2
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(),
            # 19.softmax
            nn.Linear(4096, 2),  # 最后通过softmax层，输出2个类别
        )

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0),-1)
        out = self.classifier(out)
        return out

In [25]:
def train_epoch(model, loss_func, optimizer, dataloader):
    model.train()
    for batch_x, batch_y in dataloader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        if hasattr(torch.cuda, 'empty_cache'):
            torch.cuda.empty_cache()
        logits = model(batch_x)
        error = loss_func(logits, batch_y)
        error.backward()
        optimizer.step()

In [26]:
# 定义训练的辅助函数，其中包括误差 error 与正确率 accuracy
def eval(model, loss_func, dataloader):
    model.eval()
    loss, accuracy = 0, 0
    
    # torch.no_grad显示地告诉pytorch，前向传播的时候不需要存储计算图
    with torch.no_grad():
        for batch_x, batch_y in dataloader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            logits = model(batch_x)
            error = loss_func(logits, batch_y)
            loss += error.item()

            probs, pred_y = logits.data.max(dim=1)
            accuracy += (pred_y==batch_y.data).float().sum()/batch_y.size(0)

    loss /= len(dataloader)
    accuracy = accuracy*100.0/len(dataloader)
    return loss, accuracy

In [27]:
vgg19 = VGG().to(device)
# 通过打印vgg19观察具体的网络结构
print(vgg19)

# 使用Adam进行优化处理
optimizer = torch.optim.Adam(vgg19.parameters(), lr=LR)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[40], gamma=0.1)
learn_history = []

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,

In [29]:
print('开始训练VGG19')

for epoch in range(nepochs):
    # 训练开始时间
    since = time.time()
    train_epoch(vgg19, loss_func, optimizer, train_loader)
    
    # 每训练1轮输出一次结果
    tr_loss, tr_acc = eval(vgg19, loss_func, train_loader)
    te_loss, te_acc = eval(vgg19, loss_func, test_loader)
    learn_history.append((tr_loss, tr_acc, te_loss, te_acc))
    # 完成一批次训练的结束时间
    now = time.time()
    print('[%3d/%d, %.0f seconds]|\t 训练误差: %.1e, 训练正确率: %.2f\t |\t 测试误差: %.1e, 测试正确率: %.2f'%(
        epoch+1, nepochs, now-since, tr_loss, tr_acc, te_loss, te_acc))

开始训练VGG19
[  1/5, 2271 seconds]|	 训练误差: 4.5e-01, 训练正确率: 89.19	 |	 测试误差: 6.0e-01, 测试正确率: 88.67
[  2/5, 2263 seconds]|	 训练误差: 1.0e-01, 训练正确率: 97.57	 |	 测试误差: 1.8e-01, 测试正确率: 97.00
[  3/5, 2241 seconds]|	 训练误差: 1.0e-01, 训练正确率: 97.14	 |	 测试误差: 1.4e-01, 测试正确率: 96.89
[  4/5, 2313 seconds]|	 训练误差: 2.5e+00, 训练正确率: 88.76	 |	 测试误差: 2.3e+00, 测试正确率: 88.89
[  5/5, 2630 seconds]|	 训练误差: 1.9e+00, 训练正确率: 91.33	 |	 测试误差: 1.9e+00, 测试正确率: 90.78
