In [1]:
import math
import os
import shutil
from collections import Counter

def reorg_dog_data(data_dir, label_file, train_dir, test_dir, input_dir,
                   valid_ratio):
    # 读取训练数据标签。
    with open(os.path.join(data_dir, label_file), 'r') as f:
        # 跳过文件头行（栏名称）。
        lines = f.readlines()[1:]
        tokens = [l.rstrip().split(',') for l in lines]
        idx_label = dict(((idx, label) for idx, label in tokens))
    labels = set(idx_label.values())

    num_train = len(os.listdir(os.path.join(data_dir, train_dir)))
    # 训练集中数量最少一类的狗的数量。
    min_num_train_per_label = (
        Counter(idx_label.values()).most_common()[:-2:-1][0][1])
    # 验证集中每类狗的数量。
    num_valid_per_label = math.floor(min_num_train_per_label * valid_ratio)
    label_count = dict()

    def mkdir_if_not_exist(path):
        if not os.path.exists(os.path.join(*path)):
            os.makedirs(os.path.join(*path))

    # 整理训练和验证集。
    for train_file in os.listdir(os.path.join(data_dir, train_dir)):
        idx = train_file.split('.')[0]
        label = idx_label[idx]
        mkdir_if_not_exist([data_dir, input_dir, 'train_valid', label])
        shutil.copy(os.path.join(data_dir, train_dir, train_file),
                    os.path.join(data_dir, input_dir, 'train_valid', label))
        if label not in label_count or label_count[label] < num_valid_per_label:
            mkdir_if_not_exist([data_dir, input_dir, 'valid', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'valid', label))
            label_count[label] = label_count.get(label, 0) + 1
        else:
            mkdir_if_not_exist([data_dir, input_dir, 'train', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'train', label))

    # 整理测试集。
    mkdir_if_not_exist([data_dir, input_dir, 'test', 'unknown'])
    for test_file in os.listdir(os.path.join(data_dir, test_dir)):
        shutil.copy(os.path.join(data_dir, test_dir, test_file),
                    os.path.join(data_dir, input_dir, 'test', 'unknown'))

In [20]:
data_dir = 'kaggle_dog'
label_file = 'labels.csv'
train_dir = 'train'
test_dir = 'test'
input_dir = 'train_valid_test'
batch_size = 256
valid_ratio = 0.1
reorg_dog_data(data_dir, label_file, train_dir, test_dir, input_dir,
               valid_ratio)

NameError: name 'reorg_dog_data' is not defined

In [1]:
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
import numpy as np

def transform_train(data, label):
    #im = data.asnumpy()
    #im = np.pad(im, ((4, 4), (4, 4), (0, 0)), mode='constant', constant_values=0)
    #im = image.imresize(data.astype('float32') / 255, 96, 96)
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256,
                        rand_crop=True, rand_resize=True, rand_mirror=True,
                        mean=np.array([0.485, 0.456, 0.406]),
                        std=np.array([0.229, 0.224, 0.225]),
                        brightness=0, contrast=0,
                        saturation=0, hue=0,
                        pca_noise=0.01, rand_gray=0, inter_method=2)
    for aug in auglist:
        im = aug(im)
    # 将数据格式从"高*宽*通道"改为"通道*高*宽"。
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

def transform_test(data, label):
    #im = image.imresize(data.astype('float32') / 255, 96, 96)
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256,
                        mean=np.array([0.485, 0.456, 0.406]),
                        std=np.array([0.229, 0.224, 0.225]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

In [2]:
data_dir = 'kaggle_dog'
label_file = 'labels.csv'
train_dir = 'train'
test_dir = 'test'
input_dir = 'train_valid_test'
batch_size = 16
valid_ratio = 0.1

input_str = data_dir + '/' + input_dir + '/'

# 读取原始图像文件。flag=1说明输入图像有三个通道（彩色）。
train_ds = vision.ImageFolderDataset(input_str + 'train', flag=1,
                                     transform=transform_train)
valid_ds = vision.ImageFolderDataset(input_str + 'valid', flag=1,
                                     transform=transform_test)
train_valid_ds = vision.ImageFolderDataset(input_str + 'train_valid',
                                           flag=1, transform=transform_train)
test_ds = vision.ImageFolderDataset(input_str + 'test', flag=1,
                                     transform=transform_test)

loader = gluon.data.DataLoader
train_data = loader(train_ds, batch_size, shuffle=True, last_batch='keep')
valid_data = loader(valid_ds, batch_size, shuffle=True, last_batch='keep')
train_valid_data = loader(train_valid_ds, batch_size, shuffle=True,
                          last_batch='keep')
test_data = loader(test_ds, batch_size, shuffle=False, last_batch='keep')

# 交叉熵损失函数。
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [3]:
import datetime
import sys
sys.path.append('..')
import utils

def get_loss(data, net, ctx):
    loss = 0.0
    for feas, label in data:
        label = label.as_in_context(ctx)
        output = net(feas.as_in_context(ctx))
        cross_entropy = softmax_cross_entropy(output, label)
        loss += nd.mean(cross_entropy).asscalar()
    return loss / len(data)

def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period,
          lr_decay):
    trainer = gluon.Trainer(
        net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9,
                                      'wd': wd})
    # 确保net的初始化在ctx上
    net.collect_params().reset_ctx(ctx)
    net.hybridize()
    prev_time = datetime.datetime.now()
    for epoch in range(num_epochs):
        train_loss = 0.0
        if epoch > 0 and (epoch == lr_period or epoch == int(lr_period * 1.5) or epoch ==lr_period*2):
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for data, label in train_data:
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        if valid_data is not None:
            valid_loss = get_loss(valid_data, net, ctx)
            epoch_str = ("Epoch %d. Train loss: %f, Valid loss %f, "
                         % (epoch, train_loss / len(train_data), valid_loss))
        else:
            epoch_str = ("Epoch %d. Train loss: %f, "
                         % (epoch, train_loss / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))

In [9]:
#use pretrained models
from mxnet.gluon.model_zoo import vision as models
import mxnet as mx
ctx = mx.gpu()
pretrained_net = models.densenet161(pretrained=True, ctx = ctx) 
#['DenseNet', 'densenet121', 'densenet161', 'densenet169', 'densenet201']
#['VGG','vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn', 'get_vgg']
#['ResNetV1', 'ResNetV2', 'BasicBlockV1', 'BasicBlockV2', 'BottleneckV1', 'BottleneckV2', 'resnet18_v1', 'resnet34_v1', 'resnet50_v1', 'resnet101_v1', 'resnet152_v1',
#           'resnet18_v2', 'resnet34_v2', 'resnet50_v2', 'resnet101_v2', 'resnet152_v2', 'get_resnet']

#net = gluon.model_zoo.vision.resnet152_v2(classes=120)
(pretrained_net.features[-4:])

[BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, in_channels=2208),
 Activation(relu),
 AvgPool2D(size=(7, 7), stride=(7, 7), padding=(0, 0), ceil_mode=False),
 Flatten]

In [11]:
from mxnet.gluon import nn
net = nn.HybridSequential()
for layer in pretrained_net.features[:-1]:
    net.add(layer)
    
input_shape = (224, 224)
x = nd.random.uniform(shape=(1,3,*input_shape))
print('Input:', x.shape)
print(net)
print('Output:', net(x).shape)

Input: (1, 3, 224, 224)
HybridSequential(
  (0): Conv2D(3 -> 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, in_channels=96)
  (2): Activation(relu)
  (3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False)
  (4): HybridSequential(
    (0): HybridConcurrent(
      (0): Identity(
      
      )
      (1): HybridSequential(
        (0): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, in_channels=96)
        (1): Activation(relu)
        (2): Conv2D(96 -> 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False, in_channels=192)
        (4): Activation(relu)
        (5): Conv2D(192 -> 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
    )
    (1): HybridConcurrent(
      (0): Identity(
      
      )
      (1): HybridSequential(
        (0): BatchNorm(axis=1, eps=1e-05, momentum=0.

IndexError: list index out of range

In [None]:
with net.name_scope():
    net.add(
        nn.Dropout(0.5),
        nn.Dense(120)
    )



In [6]:
net = models.densenet161(classes=120, ctx=ctx)
net.collect_params().initialize(init.Xavier(), ctx=ctx)
net.features = pretrained_net.features
#net.output.initialize(init.Xavier(), ctx=ctx)


In [6]:
#固定参数
#for _, w in net.collect_params().items():
#    w.grad_req = 'null'
for _, i in net.features.collect_params().items():
    i.lr_mult = 0

AttributeError: 'HybridSequential' object has no attribute 'features'

In [7]:
ctx = utils.try_gpu()
num_epochs = 100
learning_rate = 0.01
weight_decay = 1e-4
lr_period = 50
lr_decay = 0.1

#finetune_densenet161.hybridize()

#net = get_net(ctx)
#net.hybridize()

#net = DenseNet(growthRate=12, depth=100, reduction=0.5,
#                            bottleneck=True, nClasses=120)
#net.hybridize()
#net.initialize(ctx=mx.gpu())

train(net, train_data, valid_data, num_epochs, learning_rate,
      weight_decay, ctx, lr_period, lr_decay)

MXNetError: Shape inconsistent, Provided=[16], inferred shape=[16,2208,1]

In [6]:
#filename = "densenet_depth100.params"
filename = 'resnet50_v2.params'
net.save_params(filename)

In [8]:
import numpy as np
import os
#net = get_net(ctx)
#net.hybridize()
#train(net, train_valid_data, None, num_epochs, learning_rate, weight_decay,
#      ctx, lr_period, lr_decay)

outputs = []
for data, label in test_data:
    output = nd.softmax(net(data.as_in_context(ctx)))
    outputs.extend(output.asnumpy())
ids = sorted(os.listdir(os.path.join(data_dir, input_dir, 'test/unknown')))
with open('submission.csv', 'w') as f:
    f.write('id,' + ','.join(train_valid_ds.synsets) + '\n')
    for i, output in zip(ids, outputs):
        f.write(i.split('.')[0] + ',' + ','.join(
            [str(num) for num in output]) + '\n')