# High-level Caffe2 Example

In [1]:
import os
import sys
import caffe2
import numpy as np
from caffe2.python import core, model_helper, workspace, visualize, brew, optimizer, utils
from caffe2.proto import caffe2_pb2
from common.params import *
from common.utils import *

In [2]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.14.1
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [4]:
DEVICE_OPTS = core.DeviceOption(caffe2_pb2.CUDA, 0)  # Run on GPU

In [5]:
def create_model(m, device_opts=DEVICE_OPTS, n_classes=N_CLASSES):
    with core.DeviceScope(device_opts):
        conv1 = brew.conv(m, 'data', 'conv1', dim_in=3, dim_out=50, kernel=3, pad=1, no_gradient_to_input=1)
        relu1 = brew.relu(m, conv1, 'relu1')
        conv2 = brew.conv(m, relu1, 'conv2', dim_in=50, dim_out=50, kernel=3, pad=1)
        pool1 = brew.max_pool(m, conv2, 'pool1', kernel=2, stride=2)
        relu2 = brew.relu(m, pool1, 'relu2')
        drop1 = brew.dropout(m, relu2, 'drop1', ratio=0.25)

        conv3 = brew.conv(m, drop1, 'conv3', dim_in=50, dim_out=100, kernel=3, pad=1)
        relu3 = brew.relu(m, conv3, 'relu3')
        conv4 = brew.conv(m, relu3, 'conv4', dim_in=100, dim_out=100, kernel=3, pad=1)
        pool2 = brew.max_pool(m, conv4, 'pool2', kernel=2, stride=2)   
        relu4 = brew.relu(m, pool2, 'relu4')
        drop2 = brew.dropout(m, relu4, 'drop2', ratio=0.25)
        
        fc1 = brew.fc(m, drop2, 'fc1', dim_in=100 * 8 * 8, dim_out=512)
        relu5 = brew.relu(m, fc1, 'relu5')
        drop3 = brew.dropout(m, relu5, 'drop3', ratio=0.5)
        
        fc2 = brew.fc(m, drop3, 'fc2', dim_in=512, dim_out=n_classes)
        softmax = brew.softmax(m, fc2, 'softmax')
        return softmax

In [6]:
def add_training_operators(softmax, m, device_opts=DEVICE_OPTS, lr=LR, momentum=MOMENTUM) :
    with core.DeviceScope(device_opts):
        xent = m.LabelCrossEntropy([softmax, "label"], 'xent')
        loss = m.AveragedLoss(xent, "loss")
        m.AddGradientOperators([loss])
        opt = optimizer.build_sgd(m,
                                  base_learning_rate=lr, 
                                  policy='fixed',
                                  momentum=momentum)

In [7]:
def init_model(x_train, y_train, batchsize=BATCHSIZE, device_opts=DEVICE_OPTS):
    # Create Place-holder for data
    workspace.FeedBlob("data", x_train[:batchsize], device_option=device_opts)
    workspace.FeedBlob("label", y_train[:batchsize], device_option=device_opts)
    # Initialise model
    train_arg_scope = {
        'order': 'NCHW',
        'use_cudnn': True,
        'cudnn_exhaustive_search': True}
    train_model = model_helper.ModelHelper(
        name="train_net", arg_scope=train_arg_scope)
    softmax = create_model(train_model, device_opts=device_opts)
    add_training_operators(softmax, train_model, device_opts=device_opts)
    # Initialise workspace
    workspace.RunNetOnce(train_model.param_init_net)
    workspace.CreateNet(train_model.net)
    return train_model

In [8]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 717 ms, sys: 573 ms, total: 1.29 s
Wall time: 1.29 s


In [9]:
%%time
# Initialise model
model = init_model(x_train, y_train)

CPU times: user 455 ms, sys: 472 ms, total: 928 ms
Wall time: 1.08 s


In [10]:
%%time
# Main training loop: 51s
for j in range(EPOCHS):
    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Run one mini-batch at time
        workspace.FeedBlob("data", data, device_option=DEVICE_OPTS)
        workspace.FeedBlob("label", label, device_option=DEVICE_OPTS)
        workspace.RunNet(model.net)       
    print("Finished epoch: ", j)
    print(str(j) + ': ' + str(workspace.FetchBlob("loss")))

Finished epoch:  0
0: 1.58287
Finished epoch:  1
1: 1.0789291
Finished epoch:  2
2: 1.2310464
Finished epoch:  3
3: 0.86105233
Finished epoch:  4
4: 0.65233827
Finished epoch:  5
5: 0.7383535
Finished epoch:  6
6: 0.4522251
Finished epoch:  7
7: 0.5257952
Finished epoch:  8
8: 0.5940001
Finished epoch:  9
9: 0.38488936
CPU times: user 40 s, sys: 14 s, total: 54 s
Wall time: 54.4 s


In [11]:
%%time
# Init test model
test_arg_scope = {
    'order': 'NCHW',
    'use_cudnn': True,
    'cudnn_exhaustive_search': True,
    'is_test': True,
}
test_model= model_helper.ModelHelper(name="test_net", init_params=False, arg_scope=test_arg_scope)
create_model(test_model, device_opts=DEVICE_OPTS)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)

CPU times: user 6.1 ms, sys: 0 ns, total: 6.1 ms
Wall time: 5.99 ms


In [12]:
%%time
# Main evaluation loop: 311ms
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    workspace.FeedBlob("data", data, device_option=DEVICE_OPTS)
    workspace.RunNet(test_model.net)
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = (np.argmax(workspace.FetchBlob("softmax"), axis=-1))
    c += 1

CPU times: user 312 ms, sys: 124 ms, total: 435 ms
Wall time: 466 ms


In [13]:
print("Accuracy: ", sum(y_guess == y_truth)/float(len(y_guess)))

Accuracy:  0.7848557692307693
