Python/net spec coordinate map and crop computation #3613
Merged
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
7a8b19f
[pycaffe] add coord_map.py for computing induced coordinate transform
longjon 6149e73
[pycaffe] document, style, and complete coord_map
shelhamer 25b9ef9
[pycaffe] align coord_map and #3570 Crop layer
shelhamer 880e147
[pycaffe] test coord_map
shelhamer
Jump to file or symbol
Failed to load files and symbols.
| @@ -0,0 +1,185 @@ | ||
| +""" | ||
| +Determine spatial relationships between layers to relate their coordinates. | ||
| +Coordinates are mapped from input-to-output (forward), but can | ||
| +be mapped output-to-input (backward) by the inverse mapping too. | ||
| +This helps crop and align feature maps among other uses. | ||
| +""" | ||
| + | ||
| +from __future__ import division | ||
| +import numpy as np | ||
| +from caffe import layers as L | ||
| + | ||
| +PASS_THROUGH_LAYERS = ['AbsVal', 'BatchNorm', 'Bias', 'BNLL', 'Dropout', | ||
| + 'Eltwise', 'ELU', 'Log', 'LRN', 'Exp', 'MVN', 'Power', | ||
| + 'ReLU', 'PReLU', 'Scale', 'Sigmoid', 'Split', 'TanH', | ||
| + 'Threshold'] | ||
| + | ||
| + | ||
| +def conv_params(fn): | ||
| + """ | ||
| + Extract the spatial parameters that determine the coordinate mapping: | ||
| + kernel size, stride, padding, and dilation. | ||
| + | ||
| + Implementation detail: Convolution, Deconvolution, and Im2col layers | ||
| + define these in the convolution_param message, while Pooling has its | ||
| + own fields in pooling_param. This method deals with these details to | ||
| + extract canonical parameters. | ||
| + """ | ||
| + params = fn.params.get('convolution_param', fn.params) | ||
| + axis = params.get('axis', 1) | ||
| + ks = np.array(params['kernel_size'], ndmin=1) | ||
| + dilation = np.array(params.get('dilation', 1), ndmin=1) | ||
| + assert len({'pad_h', 'pad_w', 'kernel_h', 'kernel_w', 'stride_h', | ||
| + 'stride_w'} & set(fn.params)) == 0, \ | ||
| + 'cropping does not support legacy _h/_w params' | ||
| + return (axis, np.array(params.get('stride', 1), ndmin=1), | ||
| + (ks - 1) * dilation + 1, | ||
| + np.array(params.get('pad', 0), ndmin=1)) | ||
| + | ||
| + | ||
| +def crop_params(fn): | ||
| + """ | ||
| + Extract the crop layer parameters with defaults. | ||
| + """ | ||
| + params = fn.params.get('crop_param', fn.params) | ||
| + axis = params.get('axis', 2) # default to spatial crop for N, C, H, W | ||
| + offset = np.array(params.get('offset', 0), ndmin=1) | ||
| + return (axis, offset) | ||
| + | ||
| + | ||
| +class UndefinedMapException(Exception): | ||
| + """ | ||
| + Exception raised for layers that do not have a defined coordinate mapping. | ||
| + """ | ||
| + pass | ||
| + | ||
| + | ||
| +def coord_map(fn): | ||
| + """ | ||
| + Define the coordinate mapping by its | ||
| + - axis | ||
| + - scale: output coord[i * scale] <- input_coord[i] | ||
| + - shift: output coord[i] <- output_coord[i + shift] | ||
| + s.t. the identity mapping, as for pointwise layers like ReLu, is defined by | ||
| + (None, 1, 0) since it is independent of axis and does not transform coords. | ||
| + """ | ||
| + if fn.type_name in ['Convolution', 'Pooling', 'Im2col']: | ||
| + axis, stride, ks, pad = conv_params(fn) | ||
| + return axis, 1 / stride, (pad - (ks - 1) / 2) / stride | ||
| + elif fn.type_name == 'Deconvolution': | ||
| + axis, stride, ks, pad = conv_params(fn) | ||
| + return axis, stride, (ks - 1) / 2 - pad | ||
| + elif fn.type_name in PASS_THROUGH_LAYERS: | ||
| + return None, 1, 0 | ||
| + elif fn.type_name == 'Crop': | ||
| + axis, offset = crop_params(fn) | ||
| + axis -= 1 # -1 for last non-coordinate dim. | ||
| + return axis, 1, - offset | ||
| + else: | ||
| + raise UndefinedMapException | ||
| + | ||
| + | ||
| +class AxisMismatchException(Exception): | ||
| + """ | ||
| + Exception raised for mappings with incompatible axes. | ||
| + """ | ||
| + pass | ||
| + | ||
| + | ||
| +def compose(base_map, next_map): | ||
| + """ | ||
| + Compose a base coord map with scale a1, shift b1 with a further coord map | ||
| + with scale a2, shift b2. The scales multiply and the further shift, b2, | ||
| + is scaled by base coord scale a1. | ||
| + """ | ||
| + ax1, a1, b1 = base_map | ||
| + ax2, a2, b2 = next_map | ||
| + if ax1 is None: | ||
| + ax = ax2 | ||
| + elif ax2 is None or ax1 == ax2: | ||
| + ax = ax1 | ||
| + else: | ||
| + raise AxisMismatchException | ||
| + return ax, a1 * a2, a1 * b2 + b1 | ||
| + | ||
| + | ||
| +def inverse(coord_map): | ||
| + """ | ||
| + Invert a coord map by de-scaling and un-shifting; | ||
| + this gives the backward mapping for the gradient. | ||
| + """ | ||
| + ax, a, b = coord_map | ||
| + return ax, 1 / a, -b / a | ||
| + | ||
| + | ||
| +def coord_map_from_to(top_from, top_to): | ||
| + """ | ||
| + Determine the coordinate mapping betweeen a top (from) and a top (to). | ||
| + Walk the graph to find a common ancestor while composing the coord maps for | ||
| + from and to until they meet. As a last step the from map is inverted. | ||
| + """ | ||
| + # We need to find a common ancestor of top_from and top_to. | ||
| + # We'll assume that all ancestors are equivalent here (otherwise the graph | ||
| + # is an inconsistent state (which we could improve this to check for)). | ||
| + # For now use a brute-force algorithm. | ||
| + | ||
| + def collect_bottoms(top): | ||
| + """ | ||
| + Collect the bottoms to walk for the coordinate mapping. | ||
| + The general rule is that all the bottoms of a layer can be mapped, as | ||
| + most layers have the same coordinate mapping for each bottom. | ||
| + Crop layer is a notable exception. Only the first/cropped bottom is | ||
| + mappable; the second/dimensions bottom is excluded from the walk. | ||
| + """ | ||
| + bottoms = top.fn.inputs | ||
| + if top.fn.type_name == 'Crop': | ||
| + bottoms = bottoms[:1] | ||
| + return bottoms | ||
| + | ||
| + # walk back from top_from, keeping the coord map as we go | ||
| + from_maps = {top_from: (None, 1, 0)} | ||
| + frontier = {top_from} | ||
| + while frontier: | ||
| + top = frontier.pop() | ||
| + try: | ||
| + bottoms = collect_bottoms(top) | ||
| + for bottom in bottoms: | ||
| + from_maps[bottom] = compose(from_maps[top], coord_map(top.fn)) | ||
| + frontier.add(bottom) | ||
| + except UndefinedMapException: | ||
| + pass | ||
| + | ||
| + # now walk back from top_to until we hit a common blob | ||
| + to_maps = {top_to: (None, 1, 0)} | ||
| + frontier = {top_to} | ||
| + while frontier: | ||
| + top = frontier.pop() | ||
| + if top in from_maps: | ||
| + return compose(to_maps[top], inverse(from_maps[top])) | ||
| + try: | ||
| + bottoms = collect_bottoms(top) | ||
| + for bottom in bottoms: | ||
| + to_maps[bottom] = compose(to_maps[top], coord_map(top.fn)) | ||
| + frontier.add(bottom) | ||
| + except UndefinedMapException: | ||
| + continue | ||
| + | ||
| + # if we got here, we did not find a blob in common | ||
| + raise RuntimeError('Could not compute map between tops; are they ' | ||
| + 'connected by spatial layers?') | ||
| + | ||
| + | ||
| +def crop(top_from, top_to): | ||
| + """ | ||
| + Define a Crop layer to crop a top (from) to another top (to) by | ||
| + determining the coordinate mapping between the two and net spec'ing | ||
| + the axis and shift parameters of the crop. | ||
| + """ | ||
| + ax, a, b = coord_map_from_to(top_from, top_to) | ||
| + assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a) | ||
| + assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b) | ||
| + assert (np.round(b) == b).all(), 'cannot crop noninteger offset ' \ | ||
| + '(b = {})'.format(b) | ||
| + return L.Crop(top_from, top_to, | ||
| + crop_param=dict(axis=ax + 1, # +1 for first cropping dim. | ||
| + offset=list(-np.round(b).astype(int)))) |
| @@ -0,0 +1,192 @@ | ||
| +import unittest | ||
| + | ||
| +import numpy as np | ||
| +import random | ||
| + | ||
| +import caffe | ||
| +from caffe import layers as L | ||
| +from caffe import params as P | ||
| +from caffe.coord_map import coord_map_from_to, crop | ||
| + | ||
| + | ||
| +def coord_net_spec(ks=3, stride=1, pad=0, pool=2, dstride=2, dpad=0): | ||
| + """ | ||
| + Define net spec for simple conv-pool-deconv pattern common to all | ||
| + coordinate mapping tests. | ||
| + """ | ||
| + n = caffe.NetSpec() | ||
| + n.data = L.Input(shape=dict(dim=[2, 1, 100, 100])) | ||
| + n.aux = L.Input(shape=dict(dim=[2, 1, 20, 20])) | ||
| + n.conv = L.Convolution( | ||
| + n.data, num_output=10, kernel_size=ks, stride=stride, pad=pad) | ||
| + n.pool = L.Pooling( | ||
| + n.conv, pool=P.Pooling.MAX, kernel_size=pool, stride=pool, pad=0) | ||
| + # for upsampling kernel size is 2x stride | ||
| + try: | ||
| + deconv_ks = [s*2 for s in dstride] | ||
| + except: | ||
| + deconv_ks = dstride*2 | ||
| + n.deconv = L.Deconvolution( | ||
| + n.pool, num_output=10, kernel_size=deconv_ks, stride=dstride, pad=dpad) | ||
| + return n | ||
| + | ||
| + | ||
| +class TestCoordMap(unittest.TestCase): | ||
| + def setUp(self): | ||
| + pass | ||
| + | ||
| + def test_conv_pool_deconv(self): | ||
| + """ | ||
| + Map through conv, pool, and deconv. | ||
| + """ | ||
| + n = coord_net_spec() | ||
| + # identity for 2x pool, 2x deconv | ||
| + ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
| + self.assertEquals(ax, 1) | ||
| + self.assertEquals(a, 1) | ||
| + self.assertEquals(b, 0) | ||
| + # shift-by-one for 4x pool, 4x deconv | ||
| + n = coord_net_spec(pool=4, dstride=4) | ||
| + ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
| + self.assertEquals(ax, 1) | ||
| + self.assertEquals(a, 1) | ||
| + self.assertEquals(b, -1) | ||
| + | ||
| + def test_pass(self): | ||
| + """ | ||
| + A pass-through layer (ReLU) and conv (1x1, stride 1, pad 0) | ||
| + both do identity mapping. | ||
| + """ | ||
| + n = coord_net_spec() | ||
| + ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
| + n.relu = L.ReLU(n.deconv) | ||
| + n.conv1x1 = L.Convolution( | ||
| + n.relu, num_output=10, kernel_size=1, stride=1, pad=0) | ||
| + for top in [n.relu, n.conv1x1]: | ||
| + ax_pass, a_pass, b_pass = coord_map_from_to(top, n.data) | ||
| + self.assertEquals(ax, ax_pass) | ||
| + self.assertEquals(a, a_pass) | ||
| + self.assertEquals(b, b_pass) | ||
| + | ||
| + def test_padding(self): | ||
| + """ | ||
| + Padding conv adds offset while padding deconv subtracts offset. | ||
| + """ | ||
| + n = coord_net_spec() | ||
| + ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
| + pad = random.randint(0, 10) | ||
| + # conv padding | ||
| + n = coord_net_spec(pad=pad) | ||
| + _, a_pad, b_pad = coord_map_from_to(n.deconv, n.data) | ||
| + self.assertEquals(a, a_pad) | ||
| + self.assertEquals(b - pad, b_pad) | ||
| + # deconv padding | ||
| + n = coord_net_spec(dpad=pad) | ||
| + _, a_pad, b_pad = coord_map_from_to(n.deconv, n.data) | ||
| + self.assertEquals(a, a_pad) | ||
| + self.assertEquals(b + pad, b_pad) | ||
| + # pad both to cancel out | ||
| + n = coord_net_spec(pad=pad, dpad=pad) | ||
| + _, a_pad, b_pad = coord_map_from_to(n.deconv, n.data) | ||
| + self.assertEquals(a, a_pad) | ||
| + self.assertEquals(b, b_pad) | ||
| + | ||
| + def test_multi_conv(self): | ||
| + """ | ||
| + Multiple bottoms/tops of a layer are identically mapped. | ||
| + """ | ||
| + n = coord_net_spec() | ||
| + # multi bottom/top | ||
| + n.conv_data, n.conv_aux = L.Convolution( | ||
| + n.data, n.aux, ntop=2, num_output=10, kernel_size=5, stride=2, | ||
| + pad=0) | ||
| + ax1, a1, b1 = coord_map_from_to(n.conv_data, n.data) | ||
| + ax2, a2, b2 = coord_map_from_to(n.conv_aux, n.aux) | ||
| + self.assertEquals(ax1, ax2) | ||
| + self.assertEquals(a1, a2) | ||
| + self.assertEquals(b1, b2) | ||
| + | ||
| + def test_rect(self): | ||
| + """ | ||
| + Anisotropic mapping is equivalent to its isotropic parts. | ||
| + """ | ||
| + n3x3 = coord_net_spec(ks=3, stride=1, pad=0) | ||
| + n5x5 = coord_net_spec(ks=5, stride=2, pad=10) | ||
| + n3x5 = coord_net_spec(ks=[3, 5], stride=[1, 2], pad=[0, 10]) | ||
| + ax_3x3, a_3x3, b_3x3 = coord_map_from_to(n3x3.deconv, n3x3.data) | ||
| + ax_5x5, a_5x5, b_5x5 = coord_map_from_to(n5x5.deconv, n5x5.data) | ||
| + ax_3x5, a_3x5, b_3x5 = coord_map_from_to(n3x5.deconv, n3x5.data) | ||
| + self.assertTrue(ax_3x3 == ax_5x5 == ax_3x5) | ||
| + self.assertEquals(a_3x3, a_3x5[0]) | ||
| + self.assertEquals(b_3x3, b_3x5[0]) | ||
| + self.assertEquals(a_5x5, a_3x5[1]) | ||
| + self.assertEquals(b_5x5, b_3x5[1]) | ||
| + | ||
| + def test_nd_conv(self): | ||
| + """ | ||
| + ND conv maps the same way in more dimensions. | ||
| + """ | ||
| + n = caffe.NetSpec() | ||
| + # define data with 3 spatial dimensions, otherwise the same net | ||
| + n.data = L.Input(shape=dict(dim=[2, 3, 100, 100, 100])) | ||
| + n.conv = L.Convolution( | ||
| + n.data, num_output=10, kernel_size=[3, 3, 3], stride=[1, 1, 1], | ||
| + pad=[0, 1, 2]) | ||
| + n.pool = L.Pooling( | ||
| + n.conv, pool=P.Pooling.MAX, kernel_size=2, stride=2, pad=0) | ||
| + n.deconv = L.Deconvolution( | ||
| + n.pool, num_output=10, kernel_size=4, stride=2, pad=0) | ||
| + ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
| + self.assertEquals(ax, 1) | ||
| + self.assertTrue(len(a) == len(b)) | ||
| + self.assertTrue(np.all(a == 1)) | ||
| + self.assertEquals(b[0] - 1, b[1]) | ||
| + self.assertEquals(b[1] - 1, b[2]) | ||
| + | ||
| + def test_crop_of_crop(self): | ||
| + """ | ||
| + Map coordinates through Crop layer: | ||
| + crop an already-cropped output to the input and check change in offset. | ||
| + """ | ||
| + n = coord_net_spec() | ||
| + offset = random.randint(0, 10) | ||
| + ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
| + n.crop = L.Crop(n.deconv, n.data, axis=2, offset=offset) | ||
| + ax_crop, a_crop, b_crop = coord_map_from_to(n.crop, n.data) | ||
| + self.assertEquals(ax, ax_crop) | ||
| + self.assertEquals(a, a_crop) | ||
| + self.assertEquals(b + offset, b_crop) | ||
| + | ||
| + def test_crop_helper(self): | ||
| + """ | ||
| + Define Crop layer by crop(). | ||
| + """ | ||
| + n = coord_net_spec() | ||
| + crop(n.deconv, n.data) | ||
| + | ||
| + def test_catch_unconnected(self): | ||
| + """ | ||
| + Catch mapping spatially unconnected tops. | ||
| + """ | ||
| + n = coord_net_spec() | ||
| + n.ip = L.InnerProduct(n.deconv, num_output=10) | ||
| + with self.assertRaises(RuntimeError): | ||
| + coord_map_from_to(n.ip, n.data) | ||
| + | ||
| + def test_catch_scale_mismatch(self): | ||
| + """ | ||
| + Catch incompatible scales, such as when the top to be cropped | ||
| + is mapped to a differently strided reference top. | ||
| + """ | ||
| + n = coord_net_spec(pool=3, dstride=2) # pool 3x but deconv 2x | ||
| + with self.assertRaises(AssertionError): | ||
| + crop(n.deconv, n.data) | ||
| + | ||
| + def test_catch_negative_crop(self): | ||
| + """ | ||
| + Catch impossible offsets, such as when the top to be cropped | ||
| + is mapped to a larger reference top. | ||
| + """ | ||
| + n = coord_net_spec(dpad=10) # make output smaller than input | ||
| + with self.assertRaises(AssertionError): | ||
| + crop(n.deconv, n.data) |