# High-level Chainer Example

In [1]:
import os
import sys
import numpy as np
import math
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import cuda
from common.params import *
from common.utils import *

In [2]:
# Performance Improvement
# 1. Auto-tune
# This adds very little now .. not sure if True by default?
chainer.global_config.autotune = True

In [3]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Chainer: ", chainer.__version__)
print("CuPy: ", chainer.cuda.cupy.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Chainer:  3.4.0
CuPy:  2.4.0
Numpy:  1.14.1
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [5]:
class SymbolModule(chainer.Chain):
    def __init__(self, n_classes=N_CLASSES):
        super(SymbolModule, self).__init__()
        with self.init_scope():
            self.conv1 = L.Convolution2D(3, 50, ksize=3, pad=1)
            self.conv2 = L.Convolution2D(50, 50, ksize=3, pad=1)
            self.conv3 = L.Convolution2D(50, 100, ksize=3, pad=1)
            self.conv4 = L.Convolution2D(100, 100, ksize=3, pad=1)
            # feature map size is 8*8 by pooling
            self.fc1 = L.Linear(100*8*8, 512)
            self.fc2 = L.Linear(512, n_classes)
    
    def __call__(self, x):
        h = self.conv2(F.relu(self.conv1(x)))
        h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))
        h = F.dropout(h, 0.25)
        
        h = self.conv4(F.relu(self.conv3(h)))
        h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))
        h = F.dropout(h, 0.25)       
        
        h = F.dropout(F.relu(self.fc1(h)), 0.5)
        return self.fc2(h)

In [6]:
def init_model(m, lr=LR, momentum=MOMENTUM):
    optimizer = optimizers.MomentumSGD(lr, momentum)
    optimizer.setup(m)
    return optimizer

In [7]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 605 ms, sys: 612 ms, total: 1.22 s
Wall time: 1.22 s


In [8]:
%%time
# Create symbol
sym = SymbolModule()
chainer.cuda.get_device(0).use()  # Make a specified GPU current
sym.to_gpu()  # Copy the model to the GPU

CPU times: user 216 ms, sys: 132 ms, total: 349 ms
Wall time: 348 ms


In [9]:
%%time
optimizer = init_model(sym)

CPU times: user 115 µs, sys: 0 ns, total: 115 µs
Wall time: 119 µs


In [10]:
%%time
# Main training loop: 69s
for j in range(EPOCHS):
    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Get samples
        data = cuda.to_gpu(data)
        target = cuda.to_gpu(target)
        # Forwards
        output = sym(data)
        # Loss
        loss = F.softmax_cross_entropy(output, target)
        sym.cleargrads()
        # Back-prop
        loss.backward()
        optimizer.update()
    # Log
    print(j)

0
1
2
3
4
5
6
7
8
9
CPU times: user 1min 7s, sys: 1.61 s, total: 1min 8s
Wall time: 1min 9s


In [13]:
%%time
# Main evaluation loop: 800ms
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
    for data, target in yield_mb(x_test, y_test, BATCHSIZE):
        # Forwards
        pred = cuda.to_cpu(sym(cuda.to_gpu(data)).data.argmax(-1))
        # Collect results
        y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
        c += 1

CPU times: user 466 ms, sys: 0 ns, total: 466 ms
Wall time: 466 ms


In [14]:
print("Accuracy: ", 1.*sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.7901642628205128
