In [1]:
import os
import math
import mxnet as mx
from mxnet import image
from mxnet import nd, gluon, autograd, init
from mxnet.gluon import nn
from tensorboardX import SummaryWriter
import numpy as np
import shutil

In [2]:
import warnings
warnings.filterwarnings('ignore')

  new_obj[k] = extract_dates(v)


### 进行数据集的重新分类

In [None]:
with open('./data/trainLabels.csv', 'r') as f:
    lines = f.readlines()[1:]
    tokens = [i.rstrip().split(',') for i in lines]
    idx_label = dict((int(idx), label) for idx, label in tokens)
labels = set(idx_label.values())

num_train = len(os.listdir('./data/train/'))

num_train_tuning = int(num_train * (1 - 0.1))

num_train_tuning_per_label = num_train_tuning // len(labels)

label_count = dict()
def mkdir_if_not_exist(path):
    if not os.path.exists(os.path.join(*path)):
        os.makedirs(os.path.join(*path))
for train_file in os.listdir('./data/train/'):
    idx = int(train_file.split('.')[0])
    label = idx_label[idx]
    mkdir_if_not_exist(['./data', 'train_valid', label])
    shutil.copy(os.path.join('./data/train/', train_file),
               os.path.join('./data/train_valid', label))
    if label not in label_count or label_count[label] < num_train_tuning_per_label:
        mkdir_if_not_exist(['./data/train_data', label])
        shutil.copy(os.path.join('./data/train', train_file),
                   os.path.join('./data/train_data', label))
        label_count[label] = label_count.get(label, 0) + 1
    else:
        mkdir_if_not_exist(['./data/valid_data', label])
        shutil.copy(os.path.join('./data/train/', train_file),
                   os.path.join('./data/valid_data', label))

In [3]:
def transform_train(data, label):
    im = data.asnumpy()
    im = np.pad(im, ((4, 4), (4, 4), (0, 0)), mode='constant', constant_values=0)
    im = nd.array(im, dtype='float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 32, 32), resize=0, rand_mirror=True,
                                    rand_crop=True,
                                   mean=np.array([0.4914, 0.4822, 0.4465]),
                                   std=np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2, 0, 1)) # channel x width x height
    return im, nd.array([label]).astype('float32')

def transform_test(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 32, 32), mean=np.array([0.4914, 0.4822, 0.4465]),
                                   std=np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2, 0, 1))
    return im, nd.array([label]).astype('float32')

In [4]:
from mxnet.gluon.data.vision import ImageFolderDataset

In [5]:
train_ds = ImageFolderDataset('./data/train_data/', transform=transform_train)
valid_ds = ImageFolderDataset('./data/valid_data/', transform=transform_test)
train_valid_ds = ImageFolderDataset('./data/train_valid/', transform=transform_train)
test_ds = ImageFolderDataset('./data/testSet/', transform=transform_test)

In [6]:
from mxnet.gluon.data import DataLoader

In [7]:
train_data = DataLoader(train_ds, batch_size=16, shuffle=True, last_batch='keep')
valid_data = DataLoader(valid_ds, batch_size=16, shuffle=True, last_batch='keep')
train_valid_data = DataLoader(train_valid_ds, batch_size=128, shuffle=True, last_batch='keep')
test_data = DataLoader(test_ds, batch_size=128, shuffle=False, last_batch='keep')

In [8]:
criterion = gluon.loss.SoftmaxCrossEntropyLoss()

In [9]:
class Residual_v2_bottleneck(nn.HybridBlock):
    def __init__(self, channels, same_shape=True):
        super(Residual_v2_bottleneck, self).__init__()
        self.same_shape = same_shape
        with self.name_scope():
            strides = 1 if same_shape else 2
            self.bn1 = nn.BatchNorm()
            self.conv1 = nn.Conv2D(channels // 4, 1, use_bias=False)
            self.bn2 = nn.BatchNorm()
            self.conv2 = nn.Conv2D(
                channels // 4, 3, padding=1, strides=strides, use_bias=False)
            self.bn3 = nn.BatchNorm()
            self.conv3 = nn.Conv2D(channels, 1, use_bias=False)
            self.bn4 = nn.BatchNorm()

            if not same_shape:
                self.conv4 = nn.Conv2D(
                    channels, 1, strides=strides, use_bias=False)

    def hybrid_forward(self, F, x):
        out = self.conv1(self.bn1(x))
        out = F.relu(self.bn2(out))
        out = F.relu(self.bn3(self.conv2(out)))
        out = self.bn4(self.conv3(out))
        if not self.same_shape:
            x = self.conv4(x)
        return out + x


class ResNet164_v2(nn.HybridBlock):
    def __init__(self, num_classes, verbose=False):
        super(ResNet164_v2, self).__init__()
        self.verbose = verbose
        with self.name_scope():
            net = self.net = nn.HybridSequential()
            # block 1
            net.add(nn.Conv2D(64, 3, 1, 1, use_bias=False))
            # block 2
            for _ in range(27):
                net.add(Residual_v2_bottleneck(64))
            # block 3
            net.add(Residual_v2_bottleneck(128, same_shape=False))
            for _ in range(26):
                net.add(Residual_v2_bottleneck(128))
            # block 4
            net.add(Residual_v2_bottleneck(256, same_shape=False))
            for _ in range(26):
                net.add(Residual_v2_bottleneck(256))
            # block 5
            net.add(nn.BatchNorm())
            net.add(nn.Activation('relu'))
            net.add(nn.AvgPool2D(8))
            net.add(nn.Dense(num_classes))

    def hybrid_forward(self, F, x):
        out = x
        for i, b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('Block %d output: %s' % (i + 1, out.shape))
        return out

def get_net(ctx):
    num_outputs = 10
    net = ResNet164_v2(num_outputs)
    net.initialize(ctx=ctx, init=mx.init.Xavier())
    return net

In [15]:
class Bottleneck(nn.HybridBlock):
    def __init__(self, growthRate):
        super(Bottleneck, self).__init__()
        interChannels = 4 * growthRate
        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.conv1 = nn.Conv2D(
                interChannels,
                kernel_size=1,
                use_bias=False,
                weight_initializer=init.Normal(math.sqrt(2. / interChannels)))
            self.bn2 = nn.BatchNorm()
            self.conv2 = nn.Conv2D(
                growthRate,
                kernel_size=3,
                padding=1,
                use_bias=False,
                weight_initializer=init.Normal(
                    math.sqrt(2. / (9 * growthRate))))

    def hybrid_forward(self, F, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = F.concat(* [x, out], dim=1)
        return out


class SingleLayer(nn.HybridBlock):
    def __init__(self, growthRate):
        super(SingleLayer, self).__init__()
        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.conv1 = nn.Conv2D(
                growthRate,
                kernel_size=3,
                padding=1,
                use_bias=False,
                weight_initializer=init.Normal(
                    math.sqrt(2. / (9 * growthRate))))

    def hybrid_forward(self, F, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.concat(* [x, out], 1)
        return out


class Transition(nn.HybridBlock):
    def __init__(self, nOutChannels):
        super(Transition, self).__init__()
        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.conv1 = nn.Conv2D(
                nOutChannels,
                kernel_size=1,
                use_bias=False,
                weight_initializer=init.Normal(math.sqrt(2. / nOutChannels)))

    def hybrid_forward(self, F, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.Pooling(out, kernel=(2, 2), stride=(2, 2), pool_type='avg')
        return out


class DenseNet(nn.HybridBlock):
    def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
        super(DenseNet, self).__init__()

        nDenseBlocks = (depth - 4) // 3
        if bottleneck:
            nDenseBlocks //= 2

        nChannels = 2 * growthRate
        with self.name_scope():
            self.conv1 = nn.Conv2D(
                nChannels,
                kernel_size=3,
                padding=1,
                use_bias=False,
                weight_initializer=init.Normal(math.sqrt(2. / nChannels)))
            self.dense1 = self._make_dense(growthRate, nDenseBlocks,
                                           bottleneck)

        nChannels += nDenseBlocks * growthRate
        nOutChannels = int(math.floor(nChannels * reduction))
        with self.name_scope():
            self.trans1 = Transition(nOutChannels)

        nChannels = nOutChannels
        with self.name_scope():
            self.dense2 = self._make_dense(growthRate, nDenseBlocks,
                                           bottleneck)
        nChannels += nDenseBlocks * growthRate
        nOutChannels = int(math.floor(nChannels * reduction))
        with self.name_scope():
            self.trans2 = Transition(nOutChannels)

        nChannels = nOutChannels
        with self.name_scope():
            self.dense3 = self._make_dense(growthRate, nDenseBlocks,
                                           bottleneck)
        nChannels += nDenseBlocks * growthRate

        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.fc = nn.Dense(nClasses)

    def _make_dense(self, growthRate, nDenseBlocks, bottleneck):
        layers = nn.HybridSequential()
        for i in range(int(nDenseBlocks)):
            if bottleneck:
                layers.add(Bottleneck(growthRate))
            else:
                layers.add(SingleLayer(growthRate))
        return layers

    def hybrid_forward(self, F, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.dense3(out)
        out = F.Pooling(
            F.relu(self.bn1(out)),
            global_pool=1,
            pool_type='avg',
            kernel=(8, 8))
        out = self.fc(out)
        return out

In [None]:
net = DenseNet(growthRate=12, depth=100, reduction=0.5,
                            bottleneck=True, nClasses=10)
net.hybridize()
net.initialize(ctx=mx.gpu(0))

In [10]:
import datetime
writer = SummaryWriter()

def get_acc(output, label):
    pred = output.argmax(1, keepdims=True)
    correct = (pred == label).sum()
    return correct.asscalar()

def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_decay):
    trainer = gluon.Trainer(
        net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})

    prev_time = datetime.datetime.now()
    for epoch in range(num_epochs):
        train_loss = 0
        correct = 0
        total = 0
        if epoch == 89 or epoch == 139:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for data, label in train_data:
            bs = data.shape[0]
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data)
                loss = criterion(output, label)
            loss.backward()
            trainer.step(bs)
            train_loss += nd.mean(loss).asscalar()
            correct += get_acc(output, label)
            total += bs
        writer.add_scalars('loss', {'train': train_loss / len(train_data)}, epoch)
        writer.add_scalars('acc', {'train': correct / total}, epoch)
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        if valid_data is not None:
            valid_correct = 0
            valid_total = 0
            valid_loss = 0
            for data, label in valid_data:
                bs = data.shape[0]
                data = data.as_in_context(ctx)
                label = label.as_in_context(ctx)
                output = net(data)
                loss = criterion(output, label)
                valid_loss += nd.mean(loss).asscalar()
                valid_correct += get_acc(output, label)
                valid_total += bs
            valid_acc = valid_correct / valid_total
            writer.add_scalars('loss', {'valid': valid_loss / len(valid_data)}, epoch)
            writer.add_scalars('acc', {'valid': valid_acc}, epoch)
            epoch_str = ("Epoch %d. Train Loss: %f, Train acc %f, Valid Loss: %f, Valid acc %f, "
                         % (epoch, train_loss / len(train_data),
                            correct / total, valid_loss / len(valid_data), valid_acc))
        else:
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, "
                         % (epoch, train_loss / len(train_data),
                            correct / total))
        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))

In [11]:
ctx = mx.gpu(0)
num_epochs = 200
learning_rate = 0.1
weight_decay = 1e-4
lr_decay = 0.1
net = get_net(ctx)
net.hybridize()
train(net, train_valid_data, None, num_epochs, learning_rate,
      weight_decay, ctx, lr_decay)

Epoch 0. Loss: 1.490191, Train acc 0.448560, Time 00:02:54, lr 0.1
Epoch 1. Loss: 0.970902, Train acc 0.653180, Time 00:02:53, lr 0.1
Epoch 2. Loss: 0.761507, Train acc 0.732220, Time 00:02:51, lr 0.1
Epoch 3. Loss: 0.637571, Train acc 0.777640, Time 00:02:51, lr 0.1
Epoch 4. Loss: 0.550763, Train acc 0.810260, Time 00:02:51, lr 0.1
Epoch 5. Loss: 0.496664, Train acc 0.827800, Time 00:02:54, lr 0.1
Epoch 6. Loss: 0.454828, Train acc 0.843060, Time 00:02:54, lr 0.1
Epoch 7. Loss: 0.420586, Train acc 0.853900, Time 00:02:53, lr 0.1
Epoch 8. Loss: 0.392722, Train acc 0.863820, Time 00:02:51, lr 0.1
Epoch 9. Loss: 0.370259, Train acc 0.871480, Time 00:02:50, lr 0.1
Epoch 10. Loss: 0.350839, Train acc 0.877480, Time 00:02:50, lr 0.1
Epoch 11. Loss: 0.331100, Train acc 0.885680, Time 00:02:50, lr 0.1
Epoch 12. Loss: 0.317203, Train acc 0.889840, Time 00:02:50, lr 0.1
Epoch 13. Loss: 0.306538, Train acc 0.893260, Time 00:02:51, lr 0.1
Epoch 14. Loss: 0.294185, Train acc 0.897560, Time 00:02:5

In [19]:
net1 = DenseNet(growthRate=12, depth=100, reduction=0.5,
                            bottleneck=True, nClasses=10)
net1.hybridize()
net1.load_params('./densenet.params', ctx=mx.gpu(0))

In [20]:
net2 = get_net(ctx)
net2.hybridize()
net2.load_params('./resnet.params', ctx=ctx)

In [None]:
import pandas as pd

preds = []
for data, _ in test_data:
    output1 = nd.softmax(net1(data.as_in_context(ctx)))
    output2 = nd.softmax(net2(data.as_in_context(ctx)))
    output = nd.concat(*[out1, out2], dim=1)
    pred_label = output.argmax(1) % 10
    preds.extend(pred_label.astype(int).asnumpy())

sorted_ids = list(range(1, len(test_ds) + 1))
sorted_ids.sort(key = lambda x:str(x))

df = pd.DataFrame({'id': sorted_ids, 'label': preds})
df['label'] = df['label'].apply(lambda x: train_ds.synsets[x])
df.to_csv('submission.csv', index=False)