main文件是进行训练和验证的主体代码文件。主要内容有：  

1、定义了训练集和数据集，并加载数据。 

2、定义模型，以及优化器、梯度下降算法组合  

3、训练模型，并可视化

In [1]:
import numpy as np
import pandas as pd

import torch
import torchvision
from torch.utils import data
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from PIL import Image
import os
import matplotlib
import matplotlib.pyplot as plt
from tensorboardX import SummaryWriter
import re
import time


Bad key "text.kerning_factor" on line 4 in
/home/hjh/ProgramFiles/anaconda3/envs/pytorch_HJH/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


打印检查有关库的版本

In [2]:
print(torch.__version__)
print(torchvision.__version__)
print(matplotlib.__version__)


1.4.0
0.5.0
3.1.3


自定义数据集：继承data.Dataset，并且定义有用的函数。

In [3]:
class MyDataset(data.Dataset):
    
    # 对数据集进行初始化
    def __init__(self, imgroot, csvroot=None, transform=None):
        self.transforms = transform
        # 有标签是训练集，不然是测试集
        if csvroot:
            csv = pd.read_csv(csvroot)
            csv.info()
            self.split = 'train'
            self.Y = csv['Category'].tolist()
        else:
            self.split = 'test'
        
        # 这里要读取目录中的所有图片文件名，然后按数字大小排序
        filenames = os.listdir(imgroot) 
        imgs = []
        for k in filenames:
            if re.match('.*\.jpg$', k):
                imgs.append(k)
        imgs.sort(key=lambda k:int(k[:-4]))
        self.imgs = [os.path.join(imgroot, k) for k in imgs]
    
    # 返回一个数据           
    def __getitem__(self, index):
        img_path = self.imgs[index]
        try:
            pil_img = Image.open(img_path)
        except OSError:
            raise RuntimeError("couldn't read imge " + img_path)
        if self.transforms:
            img = self.transforms(pil_img)
        else:
            img = torch.from_numpy(np.asarray(pil_img))
        if self.split == 'train':
            y = self.Y[index]
            return img, y
        else:
            return img
    
    # 返回图片的数量
    def __len__(self):
        return len(self.imgs)

以下定义了三种变换，其中后两者用于数据增广：  

transform1: 恒等变换  

transform2: 对图片先放大，然后随机进行水平和竖直翻转、旋转、亮度和对比度调节等变换，最后缩小到原始尺寸。其训练结果较差。  

transform3：图片只在旋转时进行截取放缩，并降低了竖直翻转和旋转缩放变换的概率。其训练结果很好。

In [4]:
transform1 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
transform2 = transforms.Compose([
    transforms.Resize(336),
    
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(45),
    transforms.RandomApply([transforms.ColorJitter(brightness = 0.5, contrast = 0.5)], p =0.5),
    
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
transform3 = transforms.Compose([
    
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.1), 
    transforms.RandomApply([transforms.RandomRotation(10, expand = False),transforms.RandomResizedCrop(224)], p =0.1),
    transforms.RandomApply([transforms.ColorJitter(brightness = 0.5, contrast = 0.5)], p =0.5),
    
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])


以下定义了几种网络模型：  

MyEasyNet：单纯地用于试水，一层卷积+relu+pooling+一层全连接层  

MyVggNet：在vgg16的基础上进行修改，主要是扩展了最后一个全连接层，并修改输出向量的尺寸

In [5]:
class MyEasyNet(nn.Module):
    def __init__(self):
        super(MyEasyNet, self).__init__()
        self.conv1 = nn.Conv2d(3,1,5)
        self.pool = nn.MaxPool2d(4,4)
        self.fc1 = nn.Linear(1*55*55, 180)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(-1, 1*55*55)
        x = F.relu(self.fc1(x))
        return x
    
class MyVggNet(nn.Module):
    def __init__(self, num_classes = 180, feature_extract = True, device = None, path = None):
        super(MyVggNet, self).__init__()
        self.vgg = torchvision.models.vgg16(pretrained=True)
        if feature_extract :
            for param in self.vgg.parameters():
                param.requires_grad = False
        self.vgg.classifier[6] = nn.Sequential(
                          nn.Linear(4096, 1024), 
                          nn.ReLU(), 
                          nn.Dropout(0.4),
                          nn.Linear(1024, num_classes),                   
                          nn.LogSoftmax(dim=1))
        if path:
            self.load_state_dict(torch.load(path))
        if device:
            self.to(device)
    
    def forward(self, x):
        x = self.vgg(x)
        return x
    
    def param_num(self):
        total_params_count = sum(p.numel() for p in self.parameters())
        print(total_params_count)
        params_to_update_count = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print(params_to_update_count)
        return 'total_params_count = {:d}, params_to_update_count = {:d}'.format(total_params_count, params_to_update_count)

定义了训练网络的相关函数：run,  mytrain,  myrun

In [6]:
def myvalid(model):
    valid_loss = 0.0
    valid_correct = 0
    valid_total = 0
    
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        for datas in validloader:
            imgs, labels = datas
            imgs, labels = imgs.to(device), labels.to(device)        
            outputs = model(imgs)
            _, predicted = torch.max(outputs.data, 1)
            valid_total += labels.size(0)
            valid_correct += (predicted == labels).sum().item()
            valid_loss += criterion(outputs, labels).item()

    valid_accuracy = valid_correct/valid_total * 100.0
    valid_loss = valid_loss / valid_total
    return valid_accuracy, valid_loss

def mytrain(model, optimizer, criterion):
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    model.train()
    for i, datas in enumerate(trainloader, 0):
        imgs, labels = datas
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        _, predicted = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        train_correct += (predicted == labels).sum().item()
        train_total += labels.size(0)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward() 
        optimizer.step() #update params

    train_accuracy = train_correct / train_total * 100.0
    train_loss = train_loss / train_total
    return train_accuracy, train_loss

def run(model, writer, path, optimizer, criterion, epochs = 20, best = 0.0):
    
    # PATH = './vgg16_adam_transforms3_epoch20.pth'

    torch.cuda.synchronize()
    start = time.time()
    end = start

    for epoch in range(epochs):
        train_accuracy, train_loss = mytrain(model, optimizer, criterion)
        valid_accuracy, valid_loss = myvalid(model)

        writer.add_scalar('valid_accuracy', valid_accuracy, global_step=epoch)
        writer.add_scalar('train_accuracy', train_accuracy, global_step=epoch)
        writer.add_scalar('valid_loss', valid_loss, global_step=epoch)
        writer.add_scalar('train_loss', train_loss, global_step=epoch)

        torch.cuda.synchronize()
        last = end
        end = time.time()
        t = end - last
        total = end - start

        print(
            'train_acc={:.4f}, train_loss={:.4f}, valid_acc={:.4f}, valid_loss={:.4f}, epoch_t={:.2f}, total_t={:.2f}'
            .format(train_accuracy,train_loss, valid_accuracy, valid_loss, t/60, total/60))
    torch.save(model.state_dict(), path)
    print('Finished Training, best_valid_accuracy is {:.4f}'.format(best))



main的主体操作流：  

1、定义了训练集和数据集，并加载数据。 

其中，设置batch_size=64（经测试，128对于1块CPU来说会内存溢出）
由于训练集的数据是按标签有序地排列，所以要设置shuffle=True，以打乱顺序，保证训练的均衡性

2、定义模型，以及优化器、梯度下降算法组合  

3、训练模型，并可视化

In [8]:
trainset = MyDataset(
        './input/train/train',
        './input/train.csv',
        transform3)
trainloader = data.DataLoader(trainset, batch_size = 64,
                                 shuffle = True, num_workers = 4)
trainiter = iter(trainloader)
validset = MyDataset(
        './input/valid/valid',
        './input/valid.csv',
        transform1)
validloader = data.DataLoader(validset, batch_size = 64,
                                 shuffle = True, num_workers = 4)
validiter = iter(validloader)


LOAD_PATH = None
# LOAD_PATH =  './vgg16_adam_transforms3_epoch20.pth'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = MyVggNet(num_classes = 180, feature_extract = False, device = device, path = LOAD_PATH)
print(model)
print(model.param_num())

criterion = nn.CrossEntropyLoss()
adam_optimizer = optim.Adam(model.parameters(), lr = 0.0001)
sgd_optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
optimizer = adam_optimizer
epochs = 20
    
SAVE_PATH = './vgg16_1.pth'
with SummaryWriter(comment='vgg16_1')as w:
    # w.add_graph(model)
    run(model,  w, SAVE_PATH, optimizer, criterion, epochs = epochs)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24497 entries, 0 to 24496
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   ID        24497 non-null  int64
 1   Category  24497 non-null  int64
dtypes: int64(2)
memory usage: 382.9 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   ID        900 non-null    int64
 1   Category  900 non-null    int64
dtypes: int64(2)
memory usage: 14.2 KB
cuda:0
MyVggNet(
  (vgg): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(