# High-level Chainer Example

In [1]:
# Parameters
EPOCHS = 10
N_CLASSES=10
BATCHSIZE = 64
LR = 0.01
MOMENTUM = 0.9
GPU = True

LOGGER_URL='msdlvm.southcentralus.cloudapp.azure.com'
LOGGER_USRENAME='admin'
LOGGER_PASSWORD='password'
LOGGER_DB='gpudata'
LOGGER_SERIES='gpu'

In [2]:
import os
from os import path
import sys
import numpy as np
import math
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import cuda
from utils import cifar_for_library, yield_mb, create_logger, Timer
from gpumon.influxdb import log_context

from influxdb import InfluxDBClient

In [None]:
client = InfluxDBClient(LOGGER_URL, 8086, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB)

In [None]:
node_id = os.getenv('AZ_BATCH_NODE_ID', default='node')
task_id = os.getenv('AZ_BATCH_TASK_ID', default='chainer')
job_id = os.getenv('AZ_BATCH_JOB_ID', default='chainer')

In [None]:
logger = create_logger(client, node_id=node_id, task_id=task_id, job_id=job_id)

In [None]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Chainer: ", chainer.__version__)
print("Numpy: ", np.__version__)

In [7]:
data_path = path.join(os.getenv('AZ_BATCHAI_INPUT_DATASET'), 'cifar-10-batches-py')

In [8]:
class SymbolModule(chainer.Chain):
    def __init__(self):
        super(SymbolModule, self).__init__(
            conv1=L.Convolution2D(3, 50, ksize=(3,3), pad=(1,1)),
            conv2=L.Convolution2D(50, 50, ksize=(3,3), pad=(1,1)),      
            conv3=L.Convolution2D(50, 100, ksize=(3,3), pad=(1,1)),  
            conv4=L.Convolution2D(100, 100, ksize=(3,3), pad=(1,1)),  
            # feature map size is 8*8 by pooling
            fc1=L.Linear(100*8*8, 512),
            fc2=L.Linear(512, N_CLASSES),
        )
    
    def __call__(self, x):
        h = F.relu(self.conv2(F.relu(self.conv1(x))))
        h = F.max_pooling_2d(h, ksize=(2,2), stride=(2,2))
        h = F.dropout(h, 0.25)
        
        h = F.relu(self.conv4(F.relu(self.conv3(h))))
        h = F.max_pooling_2d(h, ksize=(2,2), stride=(2,2))
        h = F.dropout(h, 0.25)       
        
        h = F.dropout(F.relu(self.fc1(h)), 0.5)
        return self.fc2(h)

In [9]:
def init_model(m):
    optimizer = optimizers.MomentumSGD(lr=LR, momentum=MOMENTUM)
    optimizer.setup(m)
    return optimizer

In [10]:
def to_chainer(array, **kwargs):
    return chainer.Variable(cuda.to_gpu(array), **kwargs)

In [11]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(data_path, channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
Done.
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 848 ms, sys: 560 ms, total: 1.41 s
Wall time: 1.4 s


In [12]:
%%time
# Create symbol
sym = SymbolModule()
if GPU:
    chainer.cuda.get_device(0).use()  # Make a specified GPU current
    sym.to_gpu()  # Copy the model to the GPU

CPU times: user 1min 15s, sys: 2.66 s, total: 1min 18s
Wall time: 1min 17s


In [13]:
%%time
optimizer = init_model(sym)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 119 µs


In [14]:
with Timer() as t:
    with log_context(LOGGER_URL, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB, LOGGER_SERIES, 
                     node_id=node_id, task_id=task_id, job_id=job_id):
        for j in range(EPOCHS):
            for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
                # Get samples
                optimizer.update(L.Classifier(sym), to_chainer(data), to_chainer(target))
            # Log
            print(j)
print('Training took %.03f sec.' % t.interval)
logger('training duration', value=t.interval)

0
1
2
3
4
5
6
7
8
9
CPU times: user 2min 3s, sys: 2.14 s, total: 2min 6s
Wall time: 2min 4s


In [15]:
%%time
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0

with chainer.using_config('train', False):
    for data, target in yield_mb(x_test, y_test, BATCHSIZE):
        # Forwards
        pred = chainer.cuda.to_cpu(sym(to_chainer(data)).data.argmax(-1))
        # Collect results
        y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
        c += 1

CPU times: user 600 ms, sys: 128 ms, total: 728 ms
Wall time: 725 ms


In [16]:
acc=sum(y_guess == y_truth)/len(y_guess)
print("Accuracy: ", acc)
logger('accuracy', value=acc)

Accuracy:  0.787159455128
