In [6]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.transforms.transforms as T

USE_GPU = True

TRAIN_SET_NUM = 49000
BATCH_SIZE = 64
EPOCH_NUM = 25

# 数据预处理
transform_normal = T.Compose([
    T.ToTensor(), 
    T.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010))
])

# 数据增强
transform_aug = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])


# 加载训练集
train_dataset = torchvision.datasets.CIFAR10(root='./', train=True, transform=transform_aug, download=True)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler.SubsetRandomSampler(range(TRAIN_SET_NUM)))

# 加载验证集
val_dataset = torchvision.datasets.CIFAR10(root='./', train=True, transform=transform_normal, download=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, sampler=sampler.SubsetRandomSampler(range(TRAIN_SET_NUM, 50000)))

# 加载测试集
test_dataset = torchvision.datasets.CIFAR10(root='./', train=False, transform=transform_normal, download=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [2]:
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda


In [3]:
class Block(nn.Module):
    '''Depthwise conv + Pointwise conv'''
    def __init__(self, in_planes, out_planes, stride=1):
        super(Block, self).__init__()

        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        return out

class MobileNet(nn.Module):
    # (128,2) means conv planes=128, conv stride=2, 
    # by default conv stride=1
    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 
           512, 512, 512, 512, 512, (1024,2), 1024]

    def __init__(self, num_classes=10):
        super(MobileNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)             # [batch_size, 32, 32, 32]
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.linear = nn.Linear(1024, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for x in self.cfg:
            out_planes = x if isinstance(x, int) else x[0]
            stride = 1 if isinstance(x, int) else x[1]
            layers.append(Block(in_planes, out_planes, stride))
            in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.avg_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

net = MobileNet()
print(net)

MobileNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): Block(
      (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Block(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2):

In [7]:
import copy

train_loss_hist = []
test_loss_hist = []

# 验证模型在验证集或者测试集上的准确率
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')
    num_correct = 0
    num_samples = 0
    model.eval()   # set model to evaluation mode
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            scores = model(x)
            _,preds = scores.max(1)
            num_correct += (preds==y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 *acc ))
        return acc

def train_model(model, optimizer, epochs=1, scheduler=None):
    '''
    Parameters:
    - model: A Pytorch Module giving the model to train.
    - optimizer: An optimizer object we will use to train the model
    - epochs: A Python integer giving the number of epochs to train
    Returns: best model
    '''
    best_model_wts = None
    best_acc = 0.0
    model = model.to(device=device) # move the model parameters to CPU/GPU
    for e in range(epochs):
        if scheduler:
            scheduler.step()
        for t,(x,y) in enumerate(train_dataloader):
            model.train()   # set model to training mode
            x = x.to(device)
            y = y.to(device)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch %d, loss=%.4f' % (e, loss.item()))
        acc = check_accuracy(val_dataloader, model)
        if acc > best_acc:
            best_model_wts = copy.deepcopy(model.state_dict())
            best_acc = acc
    print('best_acc:',best_acc)
    model.load_state_dict(best_model_wts)
    return model

In [8]:
from torch.optim import lr_scheduler

optimizer = optim.SGD(net.parameters(), lr=1e-2, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=15,gamma=0.1)

best_model = train_model(net, optimizer, epochs=EPOCH_NUM, scheduler=scheduler)

Epoch 0, loss=0.6895
Checking accuracy on validation set
Got 839 / 1000 correct (83.90)
Epoch 1, loss=0.3596
Checking accuracy on validation set
Got 855 / 1000 correct (85.50)
Epoch 2, loss=0.2887
Checking accuracy on validation set
Got 862 / 1000 correct (86.20)
Epoch 3, loss=0.7200
Checking accuracy on validation set
Got 852 / 1000 correct (85.20)
Epoch 4, loss=0.5093
Checking accuracy on validation set
Got 859 / 1000 correct (85.90)
Epoch 5, loss=0.3673
Checking accuracy on validation set
Got 866 / 1000 correct (86.60)
Epoch 6, loss=0.2021
Checking accuracy on validation set
Got 858 / 1000 correct (85.80)
Epoch 7, loss=0.5006
Checking accuracy on validation set
Got 862 / 1000 correct (86.20)
Epoch 8, loss=0.2052
Checking accuracy on validation set
Got 854 / 1000 correct (85.40)
Epoch 9, loss=0.3169
Checking accuracy on validation set
Got 858 / 1000 correct (85.80)
Epoch 10, loss=0.1808
Checking accuracy on validation set
Got 864 / 1000 correct (86.40)
Epoch 11, loss=0.2508
Checking 

In [9]:
check_accuracy(test_dataloader, best_model)

Checking accuracy on test set
Got 8767 / 10000 correct (87.67)


0.8767

In [None]:
# 87.67