In [30]:
import os
import math
import mxnet as mx
from mxnet import image
from mxnet import nd, gluon, autograd, init
from mxnet.gluon import nn
from tensorboardX import SummaryWriter
import numpy as np
import shutil

In [31]:
# Demo mode uses the validation dataset for training, which is smaller and faster to train.
demo = False
log_interval = 100
gpus = 1

# Options are imperative or hybrid. Use hybrid for better performance.
mode = 'hybrid'

# training hyperparameters
batch_size = 128
if demo:
    epochs = 5
    learning_rate = 0.02
    wd = 0.002
else:
    epochs = 40
    learning_rate = 0.05
    wd = 0.002


In [32]:
def transform_train(data, label):
    data = image.imresize(data, 256, 256)
    im = data.asnumpy()
    #im = np.pad(im, ((4, 4), (4, 4), (0, 0)), mode='constant', constant_values=0)
    #im = nd.array(im, dtype='float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=0, rand_mirror=True,
                                    rand_crop=True,
                                   mean=np.array([0.4914, 0.4822, 0.4465]),
                                   std=np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2, 0, 1)) # channel x width x height
    return im, nd.array([label]).astype('float32')

def transform_test(data, label):
    data = image.imresize(data, 256, 256)
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 224, 224), mean=np.array([0.4914, 0.4822, 0.4465]),
                                   std=np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2, 0, 1))
    return im, nd.array([label]).astype('float32')

In [33]:
from mxnet.gluon.data.vision import ImageFolderDataset

In [34]:
train_ds = ImageFolderDataset('CIFAR_10/train_data/', transform=transform_train)
valid_ds = ImageFolderDataset('CIFAR_10/valid_data/', transform=transform_test)
train_valid_ds = ImageFolderDataset('CIFAR_10/train_valid/', transform=transform_train)
test_ds = ImageFolderDataset('CIFAR_10/test/', transform=transform_test)

In [35]:
from mxnet.gluon.data import DataLoader
train_data = DataLoader(train_ds, batch_size=16, shuffle=True, last_batch='keep')
valid_data = DataLoader(valid_ds, batch_size=16, shuffle=True, last_batch='keep')
train_valid_data = DataLoader(train_valid_ds, batch_size=128, shuffle=True, last_batch='keep')
test_data = DataLoader(test_ds, batch_size=128, shuffle=False, last_batch='keep')

#train_data = DataLoader(train_ds, batch_size=64, shuffle=True, last_batch='keep')
#valid_data = DataLoader(valid_ds, batch_size=64, shuffle=True, last_batch='keep')
#train_valid_data = DataLoader(train_valid_ds, batch_size=64, shuffle=True, last_batch='keep')
#test_data = DataLoader(test_ds, batch_size=64, shuffle=False, last_batch='keep')

In [36]:
from mxnet.gluon.model_zoo import vision as models

pretrained_densenet161 = models.densenet161(pretrained=True, prefix='cifar_10_')
#pretrained_densenet121 = models.densenet121(pretrained=True)

In [37]:
print(pretrained_densenet161)

DenseNet(
  (features): HybridSequential(
    (0): Conv2D(96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False)
    (2): Activation(relu)
    (3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(1, 1), ceil_mode=False)
    (4): HybridSequential(
      (0): HybridConcurrent(
        (0): Identity(
        
        )
        (1): HybridSequential(
          (0): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False)
          (1): Activation(relu)
          (2): Conv2D(192, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (3): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False)
          (4): Activation(relu)
          (5): Conv2D(48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
      )
      (1): HybridConcurrent(
        (0): Identity(
        
        )
        (1): HybridSequential(
          (0): BatchNorm(axis=1, eps=1e-05, momentum=0.9, fix_gamma=False)

In [38]:
from mxnet import init

#dense net
finetune_densenet161 = models.densenet161(prefix='cifar_10_', classes=10)
finetune_densenet161.collect_params().initialize()
finetune_densenet161.features = pretrained_densenet161.features
#固定参数
for _, w in finetune_densenet161.collect_params().items():
    w.grad_req = 'null'
#finetune_densenet161.classifier.initialize(init.Xavier())

#finetune_densenet121 = models.densenet121(classes=10)
#finetune_densenet121.features = pretrained_densenet121.features
#固定参数
#for _, w in finetune_densenet121.collect_params().items():
#    w.grad_req = 'null'
#finetune_densenet121.classifier.initialize(init.Xavier())

In [9]:
#resnet 
#pretrained_resnet50_v2 = models.resnet50_v2(pretrained=True)

#finetune_resnet50_v2 = models.resnet50_v2(classes=10)
#finetune_resnet50_v2.features = pretrained_resnet50_v2.features
#固定参数
#for _, w in finetune_resnet50_v2.collect_params().items():
#    w.grad_req = 'null'
#finetune_resnet50_v2.classifier.initialize(init.Xavier())

pretrained_resnet18_v2 = models.resnet18_v2(pretrained=True)

finetune_resnet18_v2 = models.resnet18_v2(classes=10)
finetune_resnet18_v2.features = pretrained_resnet18_v2.features
#固定参数
for _, w in finetune_resnet18_v2.collect_params().items():
    w.grad_req = 'null'
finetune_resnet18_v2.classifier.initialize(init.Xavier())

In [39]:
import utils

def train(net, ctx, train_data, valid_data, epochs=20, learning_rate=0.01, wd=0.001):

    # 确保net的初始化在ctx上
    net.collect_params().reset_ctx(ctx)
    net.hybridize()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    # 训练
    trainer = gluon.Trainer(
        net.collect_params(), 'sgd', {'learning_rate': learning_rate, 'momentum': 0.9, 'wd': wd})
    utils.train(train_data, valid_data, net, loss, trainer, ctx, epochs)

In [40]:
#训练
if mode == 'hybrid':
    finetune_densenet161.hybridize()
if epochs > 0:
    contexts = mx.gpu()
    train(finetune_densenet161, contexts, train_data, valid_data, epochs, learning_rate, wd)

AssertionError: Argument data must have NDArray type, but got [[[146 165 180]
  [146 165 180]
  [146 165 180]
  ..., 
  [116 135 150]
  [116 135 150]
  [116 135 150]]

 [[146 165 180]
  [146 165 180]
  [146 165 180]
  ..., 
  [116 135 150]
  [116 135 150]
  [116 135 150]]

 [[146 165 180]
  [146 165 180]
  [146 165 180]
  ..., 
  [116 135 150]
  [116 135 150]
  [116 135 150]]

 ..., 
 [[141 163 181]
  [141 163 181]
  [141 163 181]
  ..., 
  [ 40  39  38]
  [ 40  39  38]
  [ 40  39  38]]

 [[141 163 181]
  [141 163 181]
  [141 163 181]
  ..., 
  [ 40  39  38]
  [ 40  39  38]
  [ 40  39  38]]

 [[141 163 181]
  [141 163 181]
  [141 163 181]
  ..., 
  [ 40  39  38]
  [ 40  39  38]
  [ 40  39  38]]]

In [None]:
ctx = utils.try_gpu()
train(finetune_resnet50_v2, ctx, train_data, valid_data)

In [11]:
ctx = utils.try_gpu()
train(finetune_resnet18_v2, ctx, train_data, valid_data)

MXNetError: [11:17:02] D:\Program Files (x86)\Jenkins\workspace\mxnet\mxnet\src\imperative\imperative.cc:371: Check failed: !AGInfo::IsNone(*i) Cannot differentiate node because it is not in a computational graph. You need to set is_recording to true or use autograd.record() to save computational graphs for backward. If you want to differentiate the same graph twice, you need to pass retain_graph=True to backward.

In [8]:
class Bottleneck(nn.HybridBlock):
    def __init__(self, growthRate):
        super(Bottleneck, self).__init__()
        interChannels = 4 * growthRate
        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.conv1 = nn.Conv2D(
                interChannels,
                kernel_size=1,
                use_bias=False,
                weight_initializer=init.Normal(math.sqrt(2. / interChannels)))
            self.bn2 = nn.BatchNorm()
            self.conv2 = nn.Conv2D(
                growthRate,
                kernel_size=3,
                padding=1,
                use_bias=False,
                weight_initializer=init.Normal(
                    math.sqrt(2. / (9 * growthRate))))

    def hybrid_forward(self, F, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = F.concat(* [x, out], dim=1)
        return out


class SingleLayer(nn.HybridBlock):
    def __init__(self, growthRate):
        super(SingleLayer, self).__init__()
        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.conv1 = nn.Conv2D(
                growthRate,
                kernel_size=3,
                padding=1,
                use_bias=False,
                weight_initializer=init.Normal(
                    math.sqrt(2. / (9 * growthRate))))

    def hybrid_forward(self, F, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.concat(* [x, out], 1)
        return out


class Transition(nn.HybridBlock):
    def __init__(self, nOutChannels):
        super(Transition, self).__init__()
        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.conv1 = nn.Conv2D(
                nOutChannels,
                kernel_size=1,
                use_bias=False,
                weight_initializer=init.Normal(math.sqrt(2. / nOutChannels)))

    def hybrid_forward(self, F, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.Pooling(out, kernel=(2, 2), stride=(2, 2), pool_type='avg')
        return out


class DenseNet(nn.HybridBlock):
    def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
        super(DenseNet, self).__init__()

        nDenseBlocks = (depth - 4) // 3
        if bottleneck:
            nDenseBlocks //= 2

        nChannels = 2 * growthRate
        with self.name_scope():
            self.conv1 = nn.Conv2D(
                nChannels,
                kernel_size=3,
                padding=1,
                use_bias=False,
                weight_initializer=init.Normal(math.sqrt(2. / nChannels)))
            self.dense1 = self._make_dense(growthRate, nDenseBlocks,
                                           bottleneck)

        nChannels += nDenseBlocks * growthRate
        nOutChannels = int(math.floor(nChannels * reduction))
        with self.name_scope():
            self.trans1 = Transition(nOutChannels)

        nChannels = nOutChannels
        with self.name_scope():
            self.dense2 = self._make_dense(growthRate, nDenseBlocks,
                                           bottleneck)
        nChannels += nDenseBlocks * growthRate
        nOutChannels = int(math.floor(nChannels * reduction))
        with self.name_scope():
            self.trans2 = Transition(nOutChannels)

        nChannels = nOutChannels
        with self.name_scope():
            self.dense3 = self._make_dense(growthRate, nDenseBlocks,
                                           bottleneck)
        nChannels += nDenseBlocks * growthRate

        with self.name_scope():
            self.bn1 = nn.BatchNorm()
            self.fc = nn.Dense(nClasses)

    def _make_dense(self, growthRate, nDenseBlocks, bottleneck):
        layers = nn.HybridSequential()
        for i in range(int(nDenseBlocks)):
            if bottleneck:
                layers.add(Bottleneck(growthRate))
            else:
                layers.add(SingleLayer(growthRate))
        return layers

    def hybrid_forward(self, F, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.dense3(out)
        out = F.Pooling(
            F.relu(self.bn1(out)),
            global_pool=1,
            pool_type='avg',
            kernel=(8, 8))
        out = self.fc(out)
        return out

In [15]:
net1 = DenseNet(growthRate=12, depth=100, reduction=0.5,
                            bottleneck=True, nClasses=10)
net1.hybridize()
net1.load_params('densenet-201-ImageNet.params', ctx=mx.gpu(0))

AssertionError: Parameter conv0_weight is missing in file densenet-201-ImageNet.params

In [14]:
net1.params.get('weight').data()[0][0]

RuntimeError: Parameter densenet1_weight has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks