In [3]:
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
import numpy as np
import random
import mxnet as mx
from netlib import *
ctx = mx.gpu(0)

In [4]:
"""
data loader
"""
data_dir = "CIFAR_10/train_valid_test/"

def _transform_test(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 32, 32), mean=np.array([0.4914, 0.4822, 0.4465]),
                                   std=np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2, 0, 1))
    return im, nd.array([label]).astype('float32')


def data_loader(batch_size, transform_train, transform_test=None):
    if transform_train is None:
        transform_train = _transform_train
    if transform_test is None:
        transform_test = _transform_test
        
    # flag=1 mean 3 channel image
    train_ds = vision.ImageFolderDataset(data_dir + 'train', flag=1, transform=transform_train)
    valid_ds = vision.ImageFolderDataset(data_dir + 'valid', flag=1, transform=transform_test)
    train_valid_ds = vision.ImageFolderDataset(data_dir + 'train_valid', flag=1, transform=transform_train)
    test_ds = vision.ImageFolderDataset(data_dir + "test", flag=1, transform=transform_test)

    loader = gluon.data.DataLoader
    train_data = loader(train_ds, batch_size, shuffle=True, last_batch='keep')
    valid_data = loader(valid_ds, batch_size, shuffle=True, last_batch='keep')
    train_valid_data = loader(train_valid_ds, batch_size, shuffle=True, last_batch='keep')
    test_data = loader(test_ds, batch_size, shuffle=False, last_batch='keep')
    return train_data, valid_data, train_valid_data, test_data, test_ds, train_valid_ds

In [5]:
"""
data argument
"""
def transform_train_DA1(data, label):
    im = data.asnumpy()
    im = np.pad(im, ((4, 4), (4, 4), (0, 0)), mode='constant', constant_values=0)
    im = nd.array(im, dtype='float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 32, 32), resize=0, rand_mirror=True,
                                    rand_crop=True,
                                   mean=np.array([0.4914, 0.4822, 0.4465]),
                                   std=np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2, 0, 1)) # channel x width x height
    return im, nd.array([label]).astype('float32')


def transform_train_DA2(data, label):
    im = data.astype(np.float32) / 255
    auglist = [image.RandomSizedCropAug(size=(32, 32), min_area=0.49, ratio=(0.5, 2))]
    _aug = image.CreateAugmenter(data_shape=(3, 32, 32), resize=0, 
                                rand_crop=False, rand_resize=False, rand_mirror=True,
                                mean=np.array([0.4914, 0.4822, 0.4465]),
                                std=np.array([0.2023, 0.1994, 0.2010]),
                                brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3,
                                pca_noise=0.01, rand_gray=0, inter_method=2)
    auglist.append(image.RandomOrderAug(_aug))
    
    for aug in auglist:
        im = aug(im)
    
    im = nd.transpose(im, (2, 0, 1))
    return (im, nd.array([label]).asscalar().astype('float32'))
    

random_clip_rate = 0.3
def transform_train_DA3(data, label):
    im = data.astype(np.float32) / 255
    auglist = [image.RandomSizedCropAug(size=(32, 32), min_area=0.49, ratio=(0.5, 2))]
    _aug = image.CreateAugmenter(data_shape=(3, 32, 32), resize=0, 
                                rand_crop=False, rand_resize=False, rand_mirror=True,
#                                mean=np.array([0.4914, 0.4822, 0.4465]),
#                                std=np.array([0.2023, 0.1994, 0.2010]),
                                brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3,
                                pca_noise=0.01, rand_gray=0, inter_method=2)
    auglist.append(image.RandomOrderAug(_aug))

    for aug in auglist:
        im = aug(im)
        
    if random.random() > random_clip_rate:
        im = im.clip(0, 1)
    _aug = image.ColorNormalizeAug(mean=np.array([0.4914, 0.4822, 0.4465]),
                   std=np.array([0.2023, 0.1994, 0.2010]),)
    im = _aug(im)
    
    im = nd.transpose(im, (2, 0, 1))
    return (im, nd.array([label]).asscalar().astype('float32'))

In [8]:
"""
train
"""
import datetime
import utils
import sys

def abs_mean(W):
    return nd.mean(nd.abs(W)).asscalar()

def in_list(e, l):
    for i in l:
        if i == e:
            return True
    else:
        return False

def train(net, train_data, valid_data, num_epochs, lr, lr_period, 
          lr_decay, wd, ctx, w_key, output_file=None, verbose=False, loss_f=gluon.loss.SoftmaxCrossEntropyLoss()):
    if output_file is None:
        output_file = sys.stdout
        stdout = sys.stdout
    else:
        output_file = open(output_file, "w")
        stdout = sys.stdout
        sys.stdout = output_file
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
    prev_time = datetime.datetime.now()
    
    if verbose:
        print(" #", utils.evaluate_accuracy(valid_data, net, ctx))
    
    i = 0
    for epoch in range(num_epochs):
        train_loss = 0.
        train_acc = 0.
        if in_list(epoch, lr_period):
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for data, label in train_data:
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = loss_f(output, label)
            loss.backward()
            trainer.step(data.shape[0])
            
            _loss = nd.mean(loss).asscalar()
            _acc = utils.accuracy(output, label)
            train_loss += _loss
            train_acc += _acc
            
            if verbose:
                print(" # iter", i,)
                print("loss %.5f" % _loss, "acc %.5f" % _acc,)
                print("w (",)
                for k in w_key:
                    w = net.collect_params()[k]
                    print("%.5f, " % abs_mean(w.data()),)
                print(") g (",)
                for k in w_key:
                    w = net.collect_params()[k]
                    print("%.5f, " % abs_mean(w.grad()),)
                print(")")
                i += 1
            
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        
        train_loss /= len(train_data)
        train_acc /= len(train_data)
        
        if valid_data is not None:
            valid_acc = utils.evaluate_accuracy(valid_data, net, ctx)
            epoch_str = ("epoch %d, loss %.5f, train_acc %.4f, valid_acc %.4f" 
                         % (epoch, train_loss, train_acc, valid_acc))
        else:
            epoch_str = ("epoch %d, loss %.5f, train_acc %.4f"
                        % (epoch, train_loss, train_acc))
        prev_time = cur_time
        output_file.write(epoch_str + ", " + time_str + ",lr " + str(trainer.learning_rate) + "\n")
        output_file.flush()  # to disk only when flush or close
    if output_file != stdout:
        sys.stdout = stdout
        output_file.close()

In [13]:
batch_size = 16
transform_train = transform_train_DA1
train_data, valid_data, train_valid_data, test_data, test_ds, train_valid_ds = data_loader(batch_size, transform_train)
net = ResNet164_v2(10)
loss_f = gluon.loss.SoftmaxCrossEntropyLoss()

num_epochs = 200
learning_rate = 0.1
weight_decay = 1e-4
lr_period = [90, 140]
lr_decay=0.1
log_file = None

net.collect_params().initialize(mx.init.Xavier(), ctx=ctx, force_reinit=True)
net.hybridize()
w_key = []
train(net, train_data, valid_data, num_epochs, learning_rate, 
      lr_period, lr_decay, weight_decay, ctx, w_key, log_file, False, loss_f)

net.save_params("v1/models/shelock_resnet_orign")

MXNetError: [10:40:13] d:\program files (x86)\jenkins\workspace\mxnet\mxnet\mshadow\mshadow\./cuda/tensor_gpu-inl.cuh:110: Check failed: err == cudaSuccess (2 vs. 0) Name: MapPlanKernel ErrStr:out of memory

In [14]:
batch_size = 128
transform_train = transform_train_DA1
train_data, valid_data, train_valid_data, test_data, test_ds, train_valid_ds = data_loader(batch_size, transform_train)
net = DenseNet(growthRate=12, depth=100, reduction=0.5, bottleneck=True, nClasses=10)
loss_f = gluon.loss.SoftmaxCrossEntropyLoss()

num_epochs = 200
learning_rate = 0.1
weight_decay = 1e-4
lr_period = [90, 140]
lr_decay=0.1
log_file = None

net.hybridize()
net.initialize(ctx=ctx)
w_key = []
train(net, train_data, valid_data, num_epochs, learning_rate, lr_period, lr_decay, weight_decay, ctx, w_key, log_file, False, loss_f)
net.save_params("models/shelock_densenet_orign")

MXNetError: [10:41:50] d:\program files (x86)\jenkins\workspace\mxnet\mxnet\mshadow\mshadow\./cuda/tensor_gpu-inl.cuh:110: Check failed: err == cudaSuccess (2 vs. 0) Name: MapPlanKernel ErrStr:out of memory