# 建立MobileNet对花朵图片进行分类
通过定义函数的方式定义中间层，简化建立模型的代码

使用GPU进行训练

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import ImageFolder
from torchvision import transforms
import matplotlib.pyplot as plt

cuda_gpu = torch.cuda.is_available()
if cuda_gpu:
    torch.cuda.set_device(1)
    print(torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print('There is no available gpu.')

In [2]:
data_dir = os.path.join('..', 'data')
flower_dir = os.path.join(data_dir, 'flower_photos')
train_dir = os.path.join(flower_dir, 'train')
test_dir = os.path.join(flower_dir, 'test')

## transforms, ImageFolder, DataLoader
* 使用``torchvision.transforms.Compose``定义一些Data augmentation操作
* 首先使用``torchvision.datasets.ImageFolder``读取出图片数据，之后传入transforms操作，对图片进行处理
* 使用``torch.utils.data.DataLoader``定义Dataloader

### transforms.ToTensor()
将PIL Image或者 ndarray 转换为tensor，并且归一化至$[0-1]$
* 注意事项：归一化至$[0-1]$是直接除以255，若自己的ndarray数据尺度有变化，则需要自行修改。

### transforms.RandomRotation(degrees, resample=False, expand=False, center=None)
依degrees随机旋转一定角度

参数：
* ``degress``- (sequence or float or int) ，若为单个数，如 30，则表示在（-30，+30）之间随机旋转
若为sequence，如(30，60)，则表示在30-60度之间随机旋转
* ``resample``- 重采样方法选择，可选 PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC，默认为最近邻
* ``expand``- Optional expansion flag. If true, expands the output to make it large enough to hold the entire rotated image. If false or omitted, make the output image the same size as the input image. Note that the expand flag assumes rotation around the center and no translation.
* ``center``- 可选为中心旋转还是左上角旋转。Default is the center of the image.

**需要注意，``transforms.Resize(size)``如果只传入一个int，则会将尺寸为(height, width)的图片变为(size * height / width, size)(height > width)。**

In [3]:
input_size = 224
batch_size = 64

data_transforms = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(30),
        transforms.ToTensor()
    ]),
    "val": transforms.Compose([
        transforms.Resize((input_size, input_size)),
#         transforms.CenterCrop(input_size),
        transforms.ToTensor()
    ])
}

image_datasets = {x: ImageFolder(os.path.join(flower_dir, x), data_transforms[x]) for x in ["train", "val"]}

train_loader, test_loader = [torch.utils.data.DataLoader(image_datasets[x], 
        batch_size=batch_size, shuffle=True, num_workers=4) for x in ["train", "val"]]

## 定义MobileNetV1
如使用GPU，则需要将模型转为cuda

In [4]:
class MobileNet(nn.Module):
    def __init__(self):
        super(MobileNet, self).__init__()
        
        def make_layers(channels_list, stride_list):
            layers = []
            layers.append(nn.Conv2d(3, 32, kernel_size=3, stride=2, groups=1, padding=1))
            layers.extend([nn.BatchNorm2d(32), nn.ReLU(inplace=True)])

            for lyr_num in range(len(channels_list) - 1):
                dw = nn.Conv2d(channels_list[lyr_num], channels_list[lyr_num], kernel_size=3,
                               stride=stride_list[lyr_num], groups=channels_list[lyr_num], padding=1)
                bn1 = nn.BatchNorm2d(channels_list[lyr_num])
                relu1 = nn.ReLU(inplace=True)
                conv = nn.Conv2d(channels_list[lyr_num], channels_list[lyr_num + 1], kernel_size=1,
                                stride=1, groups=1)
                bn2 = nn.BatchNorm2d(channels_list[lyr_num + 1])
                relu2 = nn.ReLU(inplace=True)

                layers += [dw, bn1, relu1, conv, bn2, relu2]

            return nn.Sequential(*layers)
        
        channels_list = [32, 64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024]
        stride_list = [1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1]
        self.model = make_layers(channels_list, stride_list)
        
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(7)
        self.linear = nn.Linear(1024, 5)

    def forward(self, x):
        h = self.model(x)
        
        h = self.pool(h)
        h = h.view(-1, 1024)
        h = self.linear(h)
        
        return h

if cuda_gpu:
    net = MobileNet().cuda()
else:
    net = MobileNet()

In [5]:
#nn.CrossEntropyLoss()中已包含softmax激活运算
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

### 如使用GPU，则需要将数据也转为cuda，并且令标签数据和预测结果类型相同（同为cuda或同为cpu）

In [6]:
for epoch in range(50):  # loop over the dataset multiple times
    train_correct = 0
    train_total = 0
    train_loss = 0.
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        
        # 将数据转为cuda
        if cuda_gpu:
            inputs = inputs.cuda()
#             labels = labels.cuda()

        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = net(inputs)
        
        # 将输出变回cpu，如果之前将labels转为cuda，这里就可以省略，需要保证outputs和labels类型相同
        if cuda_gpu:
            outputs = outputs.cpu()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        value_pred, label_pred = torch.max(outputs, axis=1)
        train_correct += (labels == label_pred).sum().item()
        train_total += labels.shape[0]
        train_loss += loss.item() * labels.shape[0]

    train_loss /= train_total
    train_correct /= train_total

    # print statistics
    print('Train Epoch: %d\nTrain: Loss: %.4f, accuracy: %.4f ' % (epoch, train_loss, train_correct), end='')

    test_correct = 0
    test_total = 0
    test_loss = 0.
    with torch.no_grad():
        for images, labels in test_loader:
            if cuda_gpu:
                images = images.cuda()
                labels = labels.cuda()
            
            y_pred = net(images)
            value_pred, label_pred = torch.max(y_pred, axis=1)
            test_correct += (labels == label_pred).sum().item()
            test_total += labels.shape[0]
            loss_batch = criterion(y_pred, labels)
            test_loss += loss_batch.item() * labels.shape[0]

        test_loss /= test_total
        test_correct /= test_total
        print('Test: Loss: %.4f, accuracy: %.4f' % (test_loss, test_correct))

print('Finished Training')

Train Epoch: 0
Train: Loss: 1.4583, accuracy: 0.3738 

KeyboardInterrupt: 