# 阿里服装属性标签识别

<img src="https://work.alibaba-inc.com/aliwork_tfs/g01_alibaba-inc_com/tfscom/TB1Zja1Xb1YBuNjSszhXXcUsFXa.tfsprivate.jpg">

In [2]:
import mxnet as mx
import numpy as np

from mxnet import nd
from mxnet import image
from mxnet import gluon
from mxnet import autograd

%matplotlib inline
import matplotlib as mlt
mlt.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt

## 数据整理

In [4]:
import os

def mkdir_if_not_exist(path):
    if not os.path.exists(os.path.join(*path)):
        os.makedirs(os.path.join(*path))

In [5]:
# mkdir_if_not_exist(['data/train_valid'])

整理数据集为``gluon``的``ImageFolderDataset``支持的数据格式

In [None]:
task = 'skirt_length_labels'

warmup_label_dir = 'data/web/Annotations/skirt_length_labels.csv'
base_label_dir = 'data/base/Annotations/label.csv'

image_path = []

with open(warmup_label_dir, 'r') as f:
    lines = f.readlines()
    tokens = [l.rstrip().split(',') for l in lines]
    for path, _, label in tokens:
        image_path.append(('data/web/' + path, label))

with open(base_label_dir, 'r') as f:
    lines = f.readlines()
    tokens = [l.rstrip().split(',') for l in lines]
    for path, _, label in tokens:
        image_path.append(('data/base/' + path, label))

画出图片，其中标签是若干个n和一个y组成的字符串，字母y出现的位置就是图片对应的类型

In [None]:
def plot_image(img_path):
    with open(img_path, 'rb') as f:
        img = image.imdecode(f.read())
    plt.imshow(img.asnumpy())
    return img

plot_image(img_path[0][0])
print("Official Label String: %s" % (image_path[0][1]))

准备好训练集和测试集的目录，以及6个裙子类别对应的子目录

In [None]:
mkdir_if_not_exist(['data/train_valid', task])
mkdir_if_not_exist(['data/train_valid', task, 'train'])
mkdir_if_not_exist(['data/train_valid', task, 'val'])
m = len(list(image_path[0][1]))
for mm in range(m):
    mkdir_if_not_exist(['data/train_valid', task, 'train', str(mm)])
    mkdir_if_not_exist(['data/train_valid', task, 'val', str(mm)])

随机打乱训练集和测试集，并复复制图片到各自对应的目录中

In [None]:
import random
import shutil

n = len(image_path)
random.seed(1024)
random.shuffle(image_path)
train_count = 0

for path, label in image_path:
    label_index = list(labell).index('y')
    if train_count < n * .9:
        shutil.copy(path, os.path.join('data/train_valid', task, 'train', str(label_index)))
    else:
        shutil.copy(path, os.path.join('data/train_valid', task, 'val', str(label_index)))
    train_count += 1

## 迁移学习
* 使用ImageNet训练好的模型进行训练

在ImageNet上训练的模型输出是1000维的，我们需要定义一个新的``resnet50_v2``网络，其中：
* 输出层之前的权重是预训练好的
* 输出是6维的，且输出层的权重随机初始化

之后，我们可以根据具体的机器环境选择将网络保存在CPU或GPU上

In [9]:
from mxnet.gluon.model_zoo import vision as models

In [11]:
pretrained_net = models.resnet50_v2(pretrained=True)

In [25]:
num_gpu = 0

ctx = [mx.gpu(i) for i in range(num_gpu)] if num_gpu > 0 else [mx.cpu()]

In [26]:
finetune_net = models.resnet50_v2(classes=6)
finetune_net.features = pretrained_net.features
finetune_net.output.initialize(mx.init.Xavier(), ctx=ctx)
finetune_net.collect_params().reset_ctx(ctx)
finetune_net.hybridize()

## 定义评估与辅助函数
* 计算Average Precision,官方的结果评价标准
* 训练集与验证集的图片增广函数
* 每轮训练结束后在测试集上的评估函数

In [16]:
def calculate_ap(labels, outputs):
    cnt = 0
    ap = 0.
    for label, output in zip(labels, outputs):
        for lb, op in zip(label.asnumpy().astype(np.int), output.asnumpy()):
            op_argsort = np.argsort(op)[::-1]
            lb_int = int(lb)
            ap += 1.0 / (1 + list(op_argsort).index(lb_int))
            cnt += 1
    return ((ap, cnt))

In [18]:
def transform_train(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256, rand_crop=True, rand_mirror=True,
                                   mean=np.array([0.485, 0.456, 0.406]), std=np.array([0.229, 0.224, 0.225]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label].asscalar()))

def transform_val(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256, 
                                    mean=np.array([0.485, 0.456, 0.406]), 
                                    std=np.array([0.229, 0.224, 0.225]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label].asscalar()))

In [19]:
# 在验证集上训预测并评估
def validate(net, val_data, ctx):
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    AP = 0.
    AP_cnt = 0
    val_loss = 0
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], 
                                          ctx_list=ctx,
                                          batch_axis=0,
                                          even_split=False)
        
        label = gluon.utils.split_and_load(batch[1],
                                          ctx_list=ctx,
                                          batch_axis=0,
                                          even_split=False)
        
        outputs = [net(X) for X in data]
        metric.update(label, outputs)
        loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
        val_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)
        ap, cnt = calculate_ap(label, outputs)
        AP += ap
        AP_cnt += cnt 
    _, val_acc = metric.get()
    return ((val_acc, AP / AP_cnt, val_loss / len(val_data)))

迁移训练的一个特性是说我们一般认为整个网络的参数不需要进行很大地改动，因此我们的学习率一般都设为一个比较小的值，比如0.001。

In [20]:
lr = 1e-3
momentum = 0.9
wd = 1e-4
epochs = 2
batch_size = 64

## 读入数据

In [21]:
train_path = os.path.join('data/train_valid', task, 'train')
val_path = os.path.join('data/train_valid', task, 'val')

# 定义训练集的 DataLoader
train_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset(train_path, transform=transform_train),
                                  batch_size=batch_size, shuffle=True, num_workers=4)

# 定义验证集的 DataLoader
val_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset(val_path, transform=transform_val),
                                batch_size=batch_size, shuffle=False, num_workers=4)

## 训练

In [28]:
trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {'learning_rate':lr, 'momentum':momentum, 'wd':wd})

# 定义准确率评估函数，损失函数
L = gluon.loss.SoftmaxCrossEntropyLoss()
metric = mx.metric.Accuracy()

In [31]:
from time import time

for epoch in range(epochs):
    tic = time()
    
    train_loss = 0
    metric.reset()
    AP = 0.
    AP_cnt = 0
    
    num_batch = len(train_index)
    
    for i, batch in enumerate(train_data):
        pass