## Extract Dataset

In [1]:
demo = False
if demo:
    import zipfile
    for fin in ['train_tiny.zip', 'test_tiny.zip', 'trainLabels.csv.zip']:
        with zipfile.ZipFile('../data/kaggle_cifar10/' + fin, 'r') as zin:
            zin.extractall('../data/kaggle_cifar10')

## Reorganize Dataset

In [2]:
import os
import shutil

def reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_ratio):
    # read train_label
    with open(os.path.join(data_dir, label_file), 'r') as f:
        # jump 1st row(item name)
        lines = f.readlines()[1:]
        tokens = [l.rstrip().split(',') for l in lines]
        idx_label = dict(((int(idx), label) for idx, label in tokens))
    labels = set(idx_label.values())
    
    num_train = len(os.listdir(os.path.join(data_dir, train_dir)))
    num_train_tuning = int(num_train * (1 - valid_ratio))
    assert 0 < num_train_tuning < num_train
    num_train_tuning_per_label = num_train_tuning // len(labels)
    label_count = dict()
    
    def mkdir_if_not_exist(path):
        if not os.path.exists(os.path.join(*path)):
            os.makedirs(os.path.join(*path))
            
    # reorganize train_data and test.data
    for train_file in os.listdir(os.path.join(data_dir, train_dir)):
        idx = int(train_file.split('.')[0])
        label = idx_label[idx]
        mkdir_if_not_exist([data_dir, input_dir, 'train_valid', label])
        shutil.copy(os.path.join(data_dir, train_dir, train_file), 
                    os.path.join(data_dir, input_dir, 'train_valid', label))
        if label not in label_count or label_count[label] < num_train_tuning_per_label:
            mkdir_if_not_exist([data_dir, input_dir, 'train', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'train', label))
            label_count[label] = label_count.get(label, 0) + 1
        else:
            mkdir_if_not_exist([data_dir, input_dir, 'valid', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'valid', label))
    
    # recorganize test_data
    mkdir_if_not_exist([data_dir, input_dir, 'test', 'unknown'])
    for test_file in os.listdir(os.path.join(data_dir, test_dir)):
        shutil.copy(os.path.join(data_dir, test_dir, test_file), 
                    os.path.join(data_dir, input_dir, 'test', 'unknown'))

In [3]:
if demo:
    train_dir = 'train_tiny'
    test_dir = 'test_tiny'
    batch_size = 1
else:
    train_dir = 'train'
    test_dir = 'test'
    batch_size = 128

data_dir = '../data/kaggle_cifar10'
label_file = 'trainLabels.csv'
input_dir = 'train_valid_test'
valid_ratio = 0.1
reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_ratio)

## Read Reorg_Data by Gluon

In [4]:
from mxnet import autograd as ag
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
import numpy as np

def transform_train(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape = (3, 32, 32), resize = 0, 
                                    rand_crop = True, rand_resize = True, rand_mirror = True, 
                                    mean = np.array([0.4914, 0.4822, 0.4465]), 
                                    std = np.array([0.2023, 0.1994, 0.2010]), 
                                    brightness = 0, contrast = 0, 
                                    saturation = 0, hue = 0, 
                                    pca_noise = 0, rand_gray = 0, inter_method = 2)
    for aug in auglist:
        im = aug(im)
    # transform data formate to channel x height x  width
    im = nd.transpose(im, (2, 0, 1))
    return (im, nd.array([label]).asscalar().astype('float32'))

# when test, just norm, no other
def transform_test(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape = (3, 32, 32), 
                                    mean = np.array([0.4914, 0.4822, 0.4465]), 
                                    std = np.array([0.2023, 0.1994, 0.2010]))
    for aug in auglist:
        im = aug(im)
    im =  nd.transpose(im, (2, 0, 1))
    return (im, nd.array([label]).asscalar().astype('float32'))

  import OpenSSL.SSL


In [5]:
input_str = data_dir + '/' + input_dir + '/'

# read original image. flag = 1 say image has 3 channels
train_ds = vision.ImageFolderDataset(input_str + 'train', flag = 1, 
                                     transform = transform_train)
valid_ds = vision.ImageFolderDataset(input_str + 'valid', flag = 1, 
                                     transform = transform_test)
train_valid_ds = vision.ImageFolderDataset(input_str + 'train_valid', 
                                           flag = 1, transform = transform_train)
test_ds = vision.ImageFolderDataset(input_str + 'test', flag = 1, 
                                    transform = transform_test)

loader = gluon.data.DataLoader
train_data = loader(train_ds, batch_size, shuffle = True, last_batch = 'keep')  # train_data x (1-valid_ratio)
valid_data = loader(valid_ds, batch_size, shuffle = True, last_batch = 'keep')  # train_data x valid_ratio
train_valid_data = loader(train_valid_ds, batch_size, shuffle = True, last_batch = 'keep') # full_train_data
test_data = loader(test_ds, batch_size, shuffle = False, last_batch = 'keep')

# cross entropy loss
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

## Model

In [6]:
from mxnet.gluon import nn
from mxnet import nd

class Residual(nn.HybridBlock):
    def __init__(self, channels, same_shape = True, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.same_shape = same_shape
        with self.name_scope():
            strides = 1 if same_shape else 2
            self.conv1 = nn.Conv2D(channels, kernel_size = 3, 
                                   padding = 1, strides = strides)
            self.bn1 = nn.BatchNorm()
            self.conv2 = nn.Conv2D(channels, kernel_size = 3, 
                                   padding = 1)
            self.bn2 = nn.BatchNorm()
            if not same_shape:
                self.conv3 = nn.Conv2D(channels, kernel_size = 1, 
                                       strides = strides)
            
    def hybrid_forward(self, F, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if not self.same_shape:
            x = self.conv3(x)
        return F.relu(out + x)

class ResNet(nn.HybridBlock):
    def __init__(self, num_classes, verbose = False, **kwargs):
        super(ResNet, self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            net = self.net = nn.HybridSequential()
            # modual 1
            net.add(nn.Conv2D(channels = 32, kernel_size = 3, 
                              strides = 1, padding = 1))
            net.add(nn.BatchNorm())
            net.add(nn.Activation(activation = 'relu'))
            net.add(nn.Dropout(0.2))
            # modual 2
            for _ in range(3):
                net.add(Residual(channels = 32))
                net.add(nn.Dropout(0.2))
            # modual 3
            net.add(Residual(channels = 64, same_shape = False))
            net.add(nn.Dropout(0.2))
            for _ in range(2):
                net.add(Residual(channels = 64))
                net.add(nn.Dropout(0.2))
            # modual 4
            net.add(Residual(channels = 128, same_shape = False))
            net.add(nn.Dropout(0.2))
            for _ in range(2):
                net.add(Residual(channels = 128))
                net.add(nn.Dropout(0.2))
            # modual 5
            net.add(nn.AvgPool2D(pool_size = 8))
            net.add(nn.Flatten())
            net.add(nn.Dense(num_classes))
        
    def hybrid_forward(self, F, x):
        out = x
        for i, b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('Block %d output: %s' % (i+1, out.shape))
        return out

def get_net(ctx):
    num_outputs = 10
    net = ResNet(num_outputs)
    net.initialize(ctx = ctx, init = init.Xavier())
    return net

## Train Model && Tune Params

In [7]:
import datetime
import sys
sys.path.append('..')
import utils

def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, lr_decay):
    trainer = gluon.Trainer(net.collect_params(), 'sgd', 
                            {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
#     trainer = gluon.Trainer(net.collect_params(), 'adam', 
#                             {'learning_rate': lr, 'wd': wd})
    prev_time =  datetime.datetime.now()
    for epoch in range(num_epochs):
        train_loss = 0.0
        train_acc = 0.0
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for data, label in train_data:
            label = label.as_in_context(ctx)
            with ag.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
            train_acc += utils.accuracy(output, label)
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = 'Time %02d:%02d:%02d' % (h, m, s)
        if valid_data is not None:
            valid_acc = utils.evaluate_accuracy(valid_data, net, ctx)
            epoch_str = ('Epoch %d. Loss: %f, Train acc %f, Valid acc %f, ' % 
                         (epoch, train_loss / len(train_data), 
                          train_acc / len(train_data), valid_acc))
        else:
            epoch_str = ('Epoch %d. Loss: %f, Train acc %f, ' % 
                         (epoch, train_loss / len(train_data), 
                          train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))

In [8]:
ctx = utils.try_gpu()
num_epochs = 200
learning_rate = 0.03
weight_decay = 5e-4
lr_period = 50
lr_decay = 0.1

net = get_net(ctx)
net.hybridize()
train(net, train_data, valid_data, num_epochs, learning_rate, 
      weight_decay, ctx, lr_period, lr_decay)

Epoch 0. Loss: 1.911086, Train acc 0.289514, Valid acc 0.383200, Time 00:00:37, lr 0.03
Epoch 1. Loss: 1.641513, Train acc 0.398627, Valid acc 0.490800, Time 00:00:38, lr 0.03
Epoch 2. Loss: 1.516769, Train acc 0.448824, Valid acc 0.499600, Time 00:00:39, lr 0.03
Epoch 3. Loss: 1.425897, Train acc 0.486284, Valid acc 0.534200, Time 00:00:39, lr 0.03
Epoch 4. Loss: 1.340124, Train acc 0.520296, Valid acc 0.587800, Time 00:00:39, lr 0.03
Epoch 5. Loss: 1.277597, Train acc 0.542182, Valid acc 0.627400, Time 00:00:39, lr 0.03
Epoch 6. Loss: 1.231233, Train acc 0.561654, Valid acc 0.668800, Time 00:00:39, lr 0.03
Epoch 7. Loss: 1.182072, Train acc 0.578253, Valid acc 0.628600, Time 00:00:39, lr 0.03
Epoch 8. Loss: 1.144418, Train acc 0.595180, Valid acc 0.676000, Time 00:00:39, lr 0.03
Epoch 9. Loss: 1.105105, Train acc 0.607898, Valid acc 0.739000, Time 00:00:40, lr 0.03
Epoch 10. Loss: 1.069911, Train acc 0.622581, Valid acc 0.716800, Time 00:00:40, lr 0.03
Epoch 11. Loss: 1.038210, Train

Epoch 92. Loss: 0.547936, Train acc 0.807901, Valid acc 0.900800, Time 00:00:39, lr 0.003
Epoch 93. Loss: 0.535330, Train acc 0.812727, Valid acc 0.901000, Time 00:00:39, lr 0.003
Epoch 94. Loss: 0.543829, Train acc 0.810722, Valid acc 0.901200, Time 00:00:39, lr 0.003
Epoch 95. Loss: 0.539927, Train acc 0.809970, Valid acc 0.898200, Time 00:00:39, lr 0.003
Epoch 96. Loss: 0.539026, Train acc 0.812919, Valid acc 0.898800, Time 00:00:39, lr 0.003
Epoch 97. Loss: 0.545226, Train acc 0.808539, Valid acc 0.903200, Time 00:00:39, lr 0.003
Epoch 98. Loss: 0.530014, Train acc 0.814618, Valid acc 0.898800, Time 00:00:39, lr 0.003
Epoch 99. Loss: 0.545471, Train acc 0.809758, Valid acc 0.903000, Time 00:00:40, lr 0.003
Epoch 100. Loss: 0.532210, Train acc 0.814068, Valid acc 0.903000, Time 00:00:40, lr 0.00030000000000000003
Epoch 101. Loss: 0.527715, Train acc 0.814986, Valid acc 0.903600, Time 00:00:39, lr 0.00030000000000000003
Epoch 102. Loss: 0.526650, Train acc 0.815538, Valid acc 0.90180

Epoch 170. Loss: 0.511403, Train acc 0.821010, Valid acc 0.905800, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 171. Loss: 0.510962, Train acc 0.822359, Valid acc 0.908000, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 172. Loss: 0.507126, Train acc 0.821425, Valid acc 0.906600, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 173. Loss: 0.515505, Train acc 0.819602, Valid acc 0.906400, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 174. Loss: 0.512699, Train acc 0.821050, Valid acc 0.906600, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 175. Loss: 0.515726, Train acc 0.821038, Valid acc 0.905400, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 176. Loss: 0.507600, Train acc 0.823368, Valid acc 0.906400, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 177. Loss: 0.514398, Train acc 0.820882, Valid acc 0.907200, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 178. Loss: 0.507405, Train acc 0.822660, Valid acc 0.906200, Time 00:00:39, lr 3.0000000000000004e-05
Epoch 179. Loss: 0.510455, T

In [9]:
import numpy as np
import pandas as pd

net = get_net(ctx)
net.hybridize()
train(net, train_valid_data, None, num_epochs, learning_rate, 
      weight_decay, ctx, lr_period, lr_decay)

preds = []
for data, label in test_data:
    output = net(data.as_in_context(ctx))
    preds.extend(output.argmax(axis = 1).astype(int).asnumpy())

sorted_ids = list(range(1, len(test_ds) + 1))
sorted_ids.sort(key = lambda x:str(x))

df = pd.DataFrame({'id': sorted_ids, 'label': preds})
df['label'] = df['label'].apply(lambda x: train_valid_ds.synsets[x])
df.to_csv('submission.csv', index = False)

Epoch 0. Loss: 1.889567, Train acc 0.297135, Time 00:00:41, lr 0.03
Epoch 1. Loss: 1.625313, Train acc 0.407025, Time 00:00:40, lr 0.03
Epoch 2. Loss: 1.480362, Train acc 0.464254, Time 00:00:40, lr 0.03
Epoch 3. Loss: 1.374243, Train acc 0.505423, Time 00:00:40, lr 0.03
Epoch 4. Loss: 1.298898, Train acc 0.534887, Time 00:00:39, lr 0.03
Epoch 5. Loss: 1.239405, Train acc 0.557900, Time 00:00:39, lr 0.03
Epoch 6. Loss: 1.183960, Train acc 0.578337, Time 00:00:39, lr 0.03
Epoch 7. Loss: 1.139475, Train acc 0.596068, Time 00:00:39, lr 0.03
Epoch 8. Loss: 1.099199, Train acc 0.609807, Time 00:00:39, lr 0.03
Epoch 9. Loss: 1.069499, Train acc 0.623449, Time 00:00:39, lr 0.03
Epoch 10. Loss: 1.027833, Train acc 0.639098, Time 00:00:39, lr 0.03
Epoch 11. Loss: 1.010752, Train acc 0.644082, Time 00:00:39, lr 0.03
Epoch 12. Loss: 0.986053, Train acc 0.652901, Time 00:00:39, lr 0.03
Epoch 13. Loss: 0.970961, Train acc 0.658720, Time 00:00:39, lr 0.03
Epoch 14. Loss: 0.948872, Train acc 0.665985

Epoch 115. Loss: 0.509144, Train acc 0.821064, Time 00:00:40, lr 0.00030000000000000003
Epoch 116. Loss: 0.510351, Train acc 0.821703, Time 00:00:40, lr 0.00030000000000000003
Epoch 117. Loss: 0.509402, Train acc 0.822922, Time 00:00:39, lr 0.00030000000000000003
Epoch 118. Loss: 0.511446, Train acc 0.820780, Time 00:00:39, lr 0.00030000000000000003
Epoch 119. Loss: 0.506774, Train acc 0.821979, Time 00:00:39, lr 0.00030000000000000003
Epoch 120. Loss: 0.511704, Train acc 0.821316, Time 00:00:39, lr 0.00030000000000000003
Epoch 121. Loss: 0.507934, Train acc 0.820920, Time 00:00:39, lr 0.00030000000000000003
Epoch 122. Loss: 0.506807, Train acc 0.823010, Time 00:00:40, lr 0.00030000000000000003
Epoch 123. Loss: 0.509118, Train acc 0.820201, Time 00:00:39, lr 0.00030000000000000003
Epoch 124. Loss: 0.508504, Train acc 0.821787, Time 00:00:39, lr 0.00030000000000000003
Epoch 125. Loss: 0.505882, Train acc 0.823190, Time 00:00:39, lr 0.00030000000000000003
Epoch 126. Loss: 0.509912, Train