## Environment Setting and Data Preprocessing

To reproduce the result, please put the dataset into a folder of google drive whose path is saved in *data_path*,  in our case, data_path = '/content/gdrive/My\ Drive/Datasets/dogcat'

In [12]:
! nvidia-smi
! pip install mxnet-cu100

Mon Feb 11 19:24:06 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.79       Driver Version: 410.79       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    68W / 149W |   1044MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------

In [0]:
import mxnet as mx
from mxnet import autograd, nd, init, gluon
from mxnet.gluon import nn, loss as gloss, data as gdata

import os
import shutil
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import zipfile

We load the dataset and move files into the virtual machine environment of colab
 to speed up image loading.

In [14]:
from google.colab import drive
drive.mount('/content/gdrive')

data_path = '/content/gdrive/My\ Drive/Datasets/dogcat'
base_path = '/content/dogcat'

! rm -rf '/content/dogcat'
! cp -r $data_path /content

for f in ['trainset.zip', 'testset.zip']:
    with zipfile.ZipFile(os.path.join(base_path, f)) as z:
        z.extractall(base_path)
        
! rm -rf /content/dogcat/__MACOSX
! ls /content/dogcat

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
sample_submission.csv  testset	    trainset
submission.csv	       testset.zip  trainset.zip


We divided the dataset into four folders:

- trainset: the original training dataset which will be used to re-train the final model after parameter tuning.
- testset: the original test dataset used to generate predictions.
- train, valid: the training dataset and validation dataset used to train and select hyperparameters. They are splitted from trainset as a split ratio.

In [15]:
train_valid_path = 'trainset'
test_path = 'testset'
train_path = 'train'
valid_path = 'valid'


def delete_dir(path):
    if os.path.exists(os.path.join(*path)):
        shutil.rmtree(os.path.join(*path))
        
def create_dir(path):
    if not os.path.exists(os.path.join(*path)):
        os.makedirs(os.path.join(*path))

def devide_train_valid(base_path, train_valid_path, train_path, valid_path,
                       valid_ratio=0.2, labels=['Dog', 'Cat']):
    delete_dir([base_path, train_path])
    delete_dir([base_path, valid_path])
        
    all_files_by_label = [os.listdir(os.path.join(base_path, train_valid_path, label))
                          for label in labels]
    
    count_train = {label:0 for label in labels}
    count_valid = {label:0 for label in labels}
    
    for label, file_per_label in zip(labels, all_files_by_label):
        print('Train&Valid set, %s: %s' % (label, len(file_per_label)))
        create_dir([base_path, train_path, label])
        create_dir([base_path, valid_path, label])
        for f in file_per_label:
            if count_train[label] < len(file_per_label) * (1 - valid_ratio):
                shutil.copy(os.path.join(base_path, train_valid_path, label, f),
                            os.path.join(base_path, train_path, label, f))
                count_train[label] += 1
            else:
                shutil.copy(os.path.join(base_path, train_valid_path, label, f),
                            os.path.join(base_path, valid_path, label, f))
                count_valid[label] += 1
        print('Train set, %s: %s' % (label, count_train[label]))
        print('Valid set, %s: %s' % (label, count_valid[label]))
                
devide_train_valid(base_path, train_valid_path, train_path, valid_path)

Train&Valid set, Dog: 9999
Train set, Dog: 8000
Valid set, Dog: 1999
Train&Valid set, Cat: 9999
Train set, Cat: 8000
Valid set, Cat: 1999


We defined four data iterators as below, given a batch size. We also have different data augmentation pipelines for train dataset and test dataset, since test dataset should keep unchanged.

In [16]:
train_data = gdata.vision.ImageFolderDataset(
    os.path.join(base_path, train_path), flag=1)
valid_data = gdata.vision.ImageFolderDataset(
    os.path.join(base_path, valid_path), flag=1)
train_valid_data = gdata.vision.ImageFolderDataset(
    os.path.join(base_path, train_valid_path), flag=1)
test_data = gdata.vision.ImageFolderDataset(
    os.path.join(base_path, test_path), flag=1)

aug_train = gdata.vision.transforms.Compose([
    gdata.vision.transforms.RandomResizedCrop(64, scale=(0.75, 1),
                                               ratio=(3.0/4.0, 4.0/3.0)),
        gdata.vision.transforms.RandomFlipLeftRight(),
    gdata.vision.transforms.RandomColorJitter(brightness=0.4, 
                                              contrast=0.4, saturation=0.4),
    gdata.vision.transforms.RandomLighting(0.1),
    gdata.vision.transforms.ToTensor(),
#     gdata.vision.transforms.Normalize([0.485, 0.456, 0.406], 
#                                       [0.229, 0.224, 0.225])
])

aug_test = gdata.vision.transforms.Compose([
    gdata.vision.transforms.ToTensor(),
#     gdata.vision.transforms.Normalize([0.485, 0.456, 0.406], 
#                                       [0.229, 0.224, 0.225])
])
    
def load_data_iter(batch_size):

    train_iter = gdata.DataLoader(train_data.transform_first(aug_train), batch_size,
                                  shuffle=True, last_batch='keep')
    valid_iter = gdata.DataLoader(valid_data.transform_first(aug_test), batch_size,
                                  shuffle=True, last_batch='keep')
    train_valid_iter = gdata.DataLoader(train_valid_data.transform_first(aug_train), batch_size,
                                  shuffle=True, last_batch='keep')
    test_iter  = gdata.DataLoader(test_data.transform_first(aug_test), batch_size,
                                  shuffle=False, last_batch='keep')
    
    return train_iter, valid_iter, train_valid_iter, test_iter

  


## Building VGG Model

In [0]:
# VGG modified
class VGGBlock(nn.Block):
    def __init__(self, num_conv, num_channel, **kwargs):
        super(VGGBlock, self).__init__(**kwargs)
        
        self.net = nn.Sequential()
        for i in range(num_conv):
            self.net.add(nn.Conv2D(num_channel, kernel_size=3, 
                              padding=1, activation='relu'))
        self.net.add(nn.MaxPool2D(pool_size=2, strides=2))
            
    def forward(self, X):
        return self.net(X)
    
def VGG(blocks):
    net = nn.Sequential()
    # Conv
    for num_conv, num_channel in blocks:
        net.add(VGGBlock(num_conv, num_channel))
    # Dense
    net.add(nn.Dense(1024, activation='relu'), nn.Dropout(0.0),
             nn.Dense(512, activation='relu'), nn.Dropout(0.0),
             nn.Dense(2))
    return net

## Training and Evaluation

In [0]:
def evaluate(net, test_iter, ctx):
    acc, n = 0.0, 0
    for X, y in test_iter:
        X, y = X.as_in_context(ctx), y.astype('float32').as_in_context(ctx)
        
        y_hat = net(X)
        acc += (y_hat.argmax(axis=1) == y).sum().asscalar()
        n += y.size
    return acc / n

def train(net, train_iter, test_iter, num_epochs, batch_size, lr, ctx):
    
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
    
    for i in range(1, num_epochs+1):
        
        train_acc, train_l, n, start = 0.0, 0.0, 0, time.time()
        
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.astype('float32').as_in_context(ctx)
            
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            
            l.backward()
            trainer.step(batch_size)
            train_l += l.asscalar()
            train_acc += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
            
        tm = time.time() - start
        print('epoch %s, train loss %.4f, train acc %.4f, time %.2f' % 
             (i, train_l/n, train_acc/n, tm))
        if test_iter:
            test_acc = evaluate(net, test_iter, ctx)
            print('epoch %s, test acc %.4f.' % (i, test_acc))

## Experiments

In [0]:
num_epochs, batch_size = 35, 128
lr = 5e-2
ctx = mx.gpu()

train_iter, valid_iter, train_valid_iter, test_iter = load_data_iter(batch_size)
# conv_arch = ((1, 32), (1, 64), (2, 128), (2, 256), (2, 256))
conv_arch = ((1, 32), (2, 64), (2, 128), (2, 256))

In [10]:
# vgg11 = vgg(conv_arch)
# vgg11.initialize(init=init.Xavier(), ctx=ctx)
# train(vgg11, train_iter, valid_iter, num_epochs, batch_size, lr, ctx)
vgg11 = VGG(conv_arch)
vgg11.initialize(init=init.Xavier(), ctx=ctx)
train(vgg11, train_iter, valid_iter, num_epochs, batch_size, lr, ctx)

epoch 1, train loss 0.6924, train acc 0.5152, time 19.20
epoch 1, test acc 0.5015.
epoch 2, train loss 0.6910, train acc 0.5321, time 16.99
epoch 2, test acc 0.5468.
epoch 3, train loss 0.6873, train acc 0.5513, time 17.88
epoch 3, test acc 0.5253.
epoch 4, train loss 0.6840, train acc 0.5618, time 18.41
epoch 4, test acc 0.5733.
epoch 5, train loss 0.6808, train acc 0.5706, time 18.26
epoch 5, test acc 0.5628.
epoch 6, train loss 0.6767, train acc 0.5806, time 18.39
epoch 6, test acc 0.6178.
epoch 7, train loss 0.6689, train acc 0.5980, time 18.37
epoch 7, test acc 0.6313.
epoch 8, train loss 0.6603, train acc 0.6118, time 18.24
epoch 8, test acc 0.6406.
epoch 9, train loss 0.6546, train acc 0.6236, time 18.23
epoch 9, test acc 0.6311.
epoch 10, train loss 0.6442, train acc 0.6358, time 17.62
epoch 10, test acc 0.6716.
epoch 11, train loss 0.6301, train acc 0.6508, time 17.55
epoch 11, test acc 0.6928.
epoch 12, train loss 0.6228, train acc 0.6562, time 17.49
epoch 12, test acc 0.6578

In [0]:
def pred_result(net, test_iter, ctx, write_path):
    res = []
    for X, y in test_iter:
        X = X.as_in_context(ctx)
        y_hat = net(X)
        pred  = y_hat.argmax(axis=1).astype('int').asnumpy()
        res.extend(pred)
        
    ids = [x for x in list(range(1, 1+len(test_data)))]
    ids.sort(key=lambda x: str(x))
    res = [train_valid_data.synsets[x] for x in res]
    
    res_df = pd.DataFrame({'id':ids, 'label':res})
    res_df.to_csv(os.path.join(write_path, 'submission.csv'), index=False)
    print('submission file has been saved in:', os.path.join(write_path, 'submission.csv'))

In [0]:
vgg11 = VGG(conv_arch)
vgg11.initialize(init=init.Xavier(), ctx=ctx)
train(vgg11, train_valid_iter, valid_iter, num_epochs, batch_size, lr, ctx)
pred_result(vgg11, test_iter, ctx, '/content/gdrive/My Drive/Datasets/dogcat')

## Define ResNet

In [0]:
class ResBlock(nn.Block):
    def __init__(self, n_channels, strides=1, nin=False, **kwargs):
        super(ResBlock, self).__init__(**kwargs)
        
        self.conv1 = nn.Conv2D(n_channels, kernel_size=3, padding=1, strides=strides)
        self.conv2 = nn.Conv2D(n_channels, kernel_size=3, padding=1)
        
        self.nin   = None
        if nin:
            self.nin = nn.Conv2D(n_channels, kernel_size=1, strides=strides)
            
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()
            
    def forward(self, X):
        res = nd.relu(self.bn1(self.conv1(X)))
        res = self.bn2(self.conv2(res))
        
        if self.nin:
            X = self.nin(X)
        return nd.relu(res + X)

In [0]:
class ResNet18(nn.Block):
    
    def __init__(self, n_outputs, **kwargs):
        super(ResNet18, self).__init__(**kwargs)
        
        self.net = nn.Sequential()
        self.net.add(
            nn.Conv2D(64, kernel_size=3, strides=1, padding=1),
            nn.BatchNorm(),
            nn.Activation('relu')
        )
        
        self.net.add(
            self.add_block(64, 2, nin=True),
            self.add_block(128, 2),
            self.add_block(256, 2),
            self.add_block(512, 2)
        )
        
        self.net.add(nn.GlobalAvgPool2D(), nn.Dense(n_outputs))
        
    def forward(self, X):
        return self.net(X)
        
    def add_block(self, n_channels, n_blocks, nin=False):
        net = nn.Sequential()

        if not nin:
            net.add(ResBlock(n_channels, 2, True))
            n_blocks -= 1

        for i in range(n_blocks):
            net.add(ResBlock(n_channels))

        return net

In [43]:
resnet = ResNet18(2)
resnet.initialize(init=init.Xavier(), force_reinit=True, ctx=ctx)
num_epochs, batch_size = 35, 128
lr = 5e-2
ctx = mx.gpu()
train(resnet, train_iter, valid_iter, num_epochs, batch_size, lr, ctx)

epoch 1, train loss 0.9074, train acc 0.5543, time 97.63
epoch 1, test acc 0.5458.
epoch 2, train loss 0.6675, train acc 0.6156, time 97.46
epoch 2, test acc 0.6496.
epoch 3, train loss 0.6459, train acc 0.6331, time 97.36
epoch 3, test acc 0.5128.
epoch 4, train loss 0.6149, train acc 0.6719, time 97.23
epoch 4, test acc 0.6738.
epoch 5, train loss 0.5932, train acc 0.6843, time 97.23
epoch 5, test acc 0.7404.
epoch 6, train loss 0.5673, train acc 0.7096, time 97.54
epoch 6, test acc 0.7071.
epoch 7, train loss 0.5462, train acc 0.7221, time 97.28
epoch 7, test acc 0.5630.
epoch 8, train loss 0.5244, train acc 0.7425, time 97.36
epoch 8, test acc 0.7449.
epoch 9, train loss 0.4928, train acc 0.7650, time 97.67
epoch 9, test acc 0.7224.
epoch 10, train loss 0.4694, train acc 0.7756, time 97.37
epoch 10, test acc 0.6308.
epoch 11, train loss 0.4428, train acc 0.7924, time 97.41
epoch 11, test acc 0.7759.
epoch 12, train loss 0.4149, train acc 0.8088, time 97.21
epoch 12, test acc 0.6983

In [35]:
for X, y in train_iter:
    X = X.as_in_context(mx.gpu())
    print(X.shape)        
    X = resnet(X)
    print(X.shape)
        
    break

(128, 3, 64, 64)
(128, 64, 64, 64)
(128, 64, 64, 64)
(128, 128, 32, 32)
(128, 128, 32, 32)
(128, 256, 16, 16)
(128, 256, 16, 16)
(128, 512, 8, 8)
(128, 512, 8, 8)
(128, 2)
