This notebook will pre-process all the images in `image_dir` using the model in `model_path`.

In [1]:
from os import path

# This is the model we are using, straight out of DIGITS.
model_path = '/home/studio/Documents/level19'
model_fn = path.join(model_path, 'snapshot_iter_205950.caffemodel')
deploy_fn = path.join(model_path, 'deploy.prototxt')
mean_fn = path.join(model_path, 'mean.binaryproto')

# These are where the files are that we want to process.
image_dir = '/home/studio/Desktop/results/download_county/results/'

First we import the libraries we'll be using and define a couple helper functions.

In [5]:
%matplotlib inline
from matplotlib import pyplot as plt
import time
import sys
import PIL.Image
import numpy as np
import scipy.misc
from google.protobuf import text_format

In [6]:
def chunks(l, n):
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

In [7]:
import os
import fnmatch
def list_all_files(directory, extensions=None):
    for root, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            base, ext = os.path.splitext(filename)
            joined = os.path.join(root, filename)
            if extensions is None or ext.lower() in extensions:
                yield joined

Then we load pycaffe, the Python wrapper for Caffe. Adding the pycaffe root to the system path is a little bit of a hack, but we can't always add it correctly. All the warnings can be ignored according to [this post](https://groups.google.com/forum/#!msg/caffe-users/LZjsJFRzfcU/TVm24uIQCQAJ).

In [8]:
pycaffe_root = '/home/studio/Documents/caffe/python'
sys.path.insert(0, pycaffe_root)
import caffe
from caffe.proto import caffe_pb2

  from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \
  from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \
  from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \


A big chunk of this code is based on the [classification example](https://github.com/NVIDIA/DIGITS/blob/master/examples/classification/example.py) that comes with DIGITS, with only minor modifications:

In [9]:
def get_net(caffemodel, deploy_file, use_gpu=True):
    if use_gpu:
        caffe.set_mode_gpu()
    return caffe.Net(deploy_file, caffemodel, caffe.TEST)

def get_transformer(deploy_file, mean_file=None):
    network = caffe_pb2.NetParameter()
    with open(deploy_file) as infile:
        text_format.Merge(infile.read(), network)

    if network.input_shape:
        dims = network.input_shape[0].dim
    else:
        dims = network.input_dim[:4]

    t = caffe.io.Transformer(inputs = {'data': dims})
    t.set_transpose('data', (2,0,1)) # transpose to (channels, height, width)

    # color images
    if dims[1] == 3:
        # channel swap
        t.set_channel_swap('data', (2,1,0))

    if mean_file:
        # set mean pixel
        with open(mean_file,'rb') as infile:
            blob = caffe_pb2.BlobProto()
            blob.MergeFromString(infile.read())
            if blob.HasField('shape'):
                blob_dims = blob.shape
                assert len(blob_dims) == 4, 'Shape should have 4 dimensions - shape is "%s"' % blob.shape
            elif blob.HasField('num') and blob.HasField('channels') and \
                    blob.HasField('height') and blob.HasField('width'):
                blob_dims = (blob.num, blob.channels, blob.height, blob.width)
            else:
                raise ValueError('blob does not provide shape or 4d dimensions')
            pixel = np.reshape(blob.data, blob_dims[1:]).mean(1).mean(1)
            t.set_mean('data', pixel)

    return t

def load_image(path, height, width):
    image = PIL.Image.open(path)
    image = image.convert('RGB')
    image = np.array(image)
    # squash
    image = scipy.misc.imresize(image, (height, width), 'bilinear')
    return image

def encode(images, net, transformer):
    caffe_images = []
    for image in images:
        if image.ndim == 2:
            caffe_images.append(image[:,:,np.newaxis])
        else:
            caffe_images.append(image)

    caffe_images = np.array(caffe_images)

    dims = transformer.inputs['data'][1:]

    new_shape = (len(images),) + tuple(dims)
    if net.blobs['data'].data.shape != new_shape:
        net.blobs['data'].reshape(*new_shape)
    for index, image in enumerate(images):
        image_data = transformer.preprocess('data', image)
        net.blobs['data'].data[index] = image_data
    net.forward()
    class_key = net.blobs.keys()[-1]
    code_key = net.blobs.keys()[-3]
    class_data = np.copy(net.blobs[class_key].data)
    code_data = np.copy(net.blobs[code_key].data).reshape(len(images), -1)
    return class_data, code_data

Then we load the network from disk, which can take up to 10 seconds the first time.

In [11]:
%time net = get_net(model_fn, deploy_fn)
transformer = get_transformer(deploy_fn, mean_fn)
_, channels, height, width = transformer.inputs['data']

CPU times: user 152 ms, sys: 1.36 s, total: 1.52 s
Wall time: 1.53 s


And we recursively list all 500k files in our target directory, which can also take up to 10 seconds the first time.

In [12]:
%time filenames = list(list_all_files(image_dir, ['.jpeg', '.png']))
np.savetxt('filenames.txt', filenames, fmt='%s')
len(filenames)

CPU times: user 3.04 s, sys: 3.4 s, total: 6.44 s
Wall time: 9.32 s


571771

After saving the filenames to disk we double check that we didn't miss any.

In [8]:
!wc -l filenames.txt

571771 filenames.txt


We check that we can load the filenames from disk, too.

In [9]:
with open('filenames.txt', 'r') as f:
    filenames = [line.strip() for line in f.readlines()]
len(filenames)

571771

We do the classification in batches, and print a note about our progress every so often, and save a checkpoint file every so often. In general, a larger batch size is going to go faster per image, but at some point we will run out of memory. After setting these variables we run the batch process.

In [23]:
batch_size = 64
checkpoint_iter = 100

In [None]:
classify_start_time = time.time()

all_code_data = None
all_class_data = None
class_fn = path.join(model_path, 'all_class_data.npy')
code_fn = path.join(model_path, 'all_code_data.npy')
for i, filename_chunk in enumerate(chunks(filenames, batch_size)):
    images = [load_image(fn, height, width) for fn in filename_chunk]
    chunk_start_time = time.time()
    class_data, code_data = encode(images, net, transformer)
    if all_code_data is None:
        all_class_data = class_data
        all_code_data = code_data
    else:
        all_class_data = np.vstack((all_class_data, class_data))
        all_code_data = np.vstack((all_code_data, code_data))
    ips = len(filename_chunk) / (time.time() - chunk_start_time)
    if i % checkpoint_iter == 0:
        print 'Batch %i: %.2f images/second, saving.' % (i, ips)
        np.save(class_fn, all_class_data)
        np.save(code_fn, all_code_data)
        
print 'Saving on final iteration.'
np.save(class_fn, all_class_data)
np.save(code_fn, all_code_data)

classify_duration = (time.time() - chunk_start_time)
ips = len(filenames) / classify_duration
print 'Classified %i images in %.2f seconds at %.2f images/second' % (len(filenames), classify_duration, ips)

Batch 0: 78.22 images/second, saving.
Batch 100: 85.77 images/second, saving.


The batch processing saved two files: `all_class_data.npy` and `all_code_data.npy`, which capture the last, and second to last layers of the network for each image. After everything is done, we can load the data to check that it didn't miss any images.

In [15]:
all_class_data = np.load(class_fn)
all_class_data.shape

(571771, 188)