# High-level CNTK Example

In [1]:
import numpy as np
import os
import sys
import cntk
from cntk.layers import Convolution2D, MaxPooling, Dense, Dropout
from common.params import *
from common.utils import *

In [2]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("CNTK: ", cntk.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.14.1
CNTK:  2.4
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [4]:
def create_symbol(n_classes=N_CLASSES):
    # Weight initialiser from uniform distribution
    # Activation (unless states) is None
    with cntk.layers.default_options(init = cntk.glorot_uniform(), activation = cntk.relu):
        x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(features)
        x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(x)
        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
        x = Dropout(0.25)(x)

        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
        x = Dropout(0.25)(x)    
        
        x = Dense(512)(x)
        x = Dropout(0.5)(x)
        x = Dense(n_classes, activation=None)(x)
        return x

In [5]:
def init_model(m, labels, lr=LR, momentum=MOMENTUM):
    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)
    # Momentum SGD
    # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
    # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
    # if unit_gain=True then ...(1-momentum)*gradient
    learner = cntk.momentum_sgd(m.parameters, 
                                lr=cntk.learning_rate_schedule(lr, cntk.UnitType.minibatch) , 
                                momentum=cntk.momentum_schedule(momentum),
                                unit_gain=False)
    return loss, learner

In [6]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True, one_hot=True)
# CNTK format
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000, 10) (10000, 10)
float32 float32 float32 float32
CPU times: user 738 ms, sys: 575 ms, total: 1.31 s
Wall time: 1.31 s


In [7]:
%%time
# Placeholders
features = cntk.input_variable((3, 32, 32), np.float32)
labels = cntk.input_variable(N_CLASSES, np.float32)
# Load symbol
sym = create_symbol()

CPU times: user 16.7 ms, sys: 40.4 ms, total: 57.1 ms
Wall time: 67.4 ms


In [8]:
%%time
loss, learner = init_model(sym, labels)

CPU times: user 122 ms, sys: 116 ms, total: 238 ms
Wall time: 239 ms


In [9]:
%%time
# Main training loop: 49s
loss.train((x_train, y_train), 
           minibatch_size=BATCHSIZE, 
           max_epochs=EPOCHS,
           parameter_learners=[learner])

CPU times: user 37.9 s, sys: 10.8 s, total: 48.7 s
Wall time: 48.8 s


{'epoch_summaries': [{'loss': 1.8144259375, 'metric': 0.0, 'samples': 50000},
  {'loss': 1.36322234375, 'metric': 0.0, 'samples': 50000},
  {'loss': 1.122504140625, 'metric': 0.0, 'samples': 50000},
  {'loss': 0.974794296875, 'metric': 0.0, 'samples': 50000},
  {'loss': 0.8672890625, 'metric': 0.0, 'samples': 50000},
  {'loss': 0.7853078125, 'metric': 0.0, 'samples': 50000},
  {'loss': 0.716815546875, 'metric': 0.0, 'samples': 50000},
  {'loss': 0.65541078125, 'metric': 0.0, 'samples': 50000},
  {'loss': 0.606273671875, 'metric': 0.0, 'samples': 50000},
  {'loss': 0.560514921875, 'metric': 0.0, 'samples': 50000}],
 'updates': [{'loss': 1.8144589081005922, 'metric': 0.0, 'samples': 49984},
  {'loss': 1.363123699583867, 'metric': 0.0, 'samples': 49984},
  {'loss': 1.1224501996889005, 'metric': 0.0, 'samples': 49984},
  {'loss': 0.9746546238546335, 'metric': 0.0, 'samples': 49984},
  {'loss': 0.8671638205475752, 'metric': 0.0, 'samples': 49984},
  {'loss': 0.7853081736155569, 'metric': 0.

In [10]:
%%time
# Main evaluation loop: 409ms
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = np.argmax(y_test[:n_samples], axis=-1)
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    predicted_label_probs = sym.eval({features : data})
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)
    c += 1

CPU times: user 284 ms, sys: 95.9 ms, total: 380 ms
Wall time: 409 ms


In [11]:
print("Accuracy: ", 1.*sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.7591145833333334
