Updating DepthToSpace and SpaceToDepth ops to match ONNX spec.

microsoft · Jun 26, 2018 · da60e4f · da60e4f
1 parent dc132a4
commit da60e4f
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 38 deletions.
diff --git a/Documentation/current_iteration.md b/Documentation/current_iteration.md
@@ -19,6 +19,10 @@ The comparison numbers for this single node are as follows:
 | Speedup/savings	Approx.  | 30%	Approx.  | 65-75%	Approx.  | 87% |
 
 ## Operators
+### depth_to_space and space_to_depth
+There is a breaking change in the **depth_to_space** and **space_to_depth** operators. These have been updated to match ONNX specification, specifically
+the permutation for how the depth dimension is placed as blocks in the spatial dimensions, and vice-versa, has been changed. Please refer to the updated doc
+examples for these two ops to see the change.
 
 
 ## Bug fixes

diff --git a/Source/CNTKv2LibraryDll/Function.cpp b/Source/CNTKv2LibraryDll/Function.cpp
@@ -2037,8 +2037,8 @@ namespace CNTK
                 LogicError("DepthToSpace: Number of channels in the operand (%zu) must be divisible by (blocksize x blocksize), i.e., (%zu x %zu).", inputShape[2], blockSize, blockSize);
         }
 
-        FunctionPtr inputView = Reshape(inputPlaceholder, { blockSize, blockSize, NDShape::InferredDimension }, Axis(2), Axis(3));
-        std::vector<Axis> axisShufflePermutation({ Axis(2), Axis(0), Axis(3), Axis(1), Axis(4) });
+        FunctionPtr inputView = Reshape(inputPlaceholder, { NDShape::InferredDimension, blockSize, blockSize }, Axis(2), Axis(3));
+        std::vector<Axis> axisShufflePermutation({ Axis(3), Axis(0), Axis(4), Axis(1), Axis(2) });
         auto shuffleOut = Transpose(inputView, axisShufflePermutation);
         auto merge23Out = Reshape(shuffleOut, { NDShape::InferredDimension }, Axis(2), Axis(4));
         auto merge01Out = Reshape(merge23Out, { NDShape::InferredDimension }, Axis(0), Axis(2));
@@ -2064,7 +2064,7 @@ namespace CNTK
 
         FunctionPtr reshape01out = Reshape(inputPlaceholder, { blockSize, NDShape::InferredDimension }, Axis(0), Axis(1));
         FunctionPtr reshape23out = Reshape(reshape01out, { blockSize, NDShape::InferredDimension }, Axis(2), Axis(3));
-        std::vector<Axis> axisShufflePermutation({ Axis(1), Axis(3), Axis(0), Axis(2), Axis(4) });
+        std::vector<Axis> axisShufflePermutation({ Axis(1), Axis(3), Axis(4), Axis(0), Axis(2) });
         auto shuffleOut = Transpose(reshape23out, axisShufflePermutation);
         auto merge234Out = Reshape(shuffleOut, { NDShape::InferredDimension }, Axis(2), Axis::EndStaticAxis());
 

diff --git a/bindings/python/cntk/ops/__init__.py b/bindings/python/cntk/ops/__init__.py
@@ -3853,15 +3853,15 @@ def depth_to_space(operand, block_size, name=''):
         >>> a = C.input_variable((8, 2, 3))
         >>> d2s_op = C.depth_to_space(a, block_size=2)
         >>> d2s_op.eval({a:x})
-        array([[[[ 0.,  1.,  0.,  1.,  0.,  1.],
-                 [ 2.,  3.,  2.,  3.,  2.,  3.],
-                 [ 0.,  1.,  0.,  1.,  0.,  1.],
-                 [ 2.,  3.,  2.,  3.,  2.,  3.]],
+        array([[[[ 0.,  2.,  0.,  2.,  0.,  2.],
+                 [ 4.,  6.,  4.,  6.,  4.,  6.],
+                 [ 0.,  2.,  0.,  2.,  0.,  2.],
+                 [ 4.,  6.,  4.,  6.,  4.,  6.]],
         <BLANKLINE>
-                [[ 4.,  5.,  4.,  5.,  4.,  5.],
-                 [ 6.,  7.,  6.,  7.,  6.,  7.],
-                 [ 4.,  5.,  4.,  5.,  4.,  5.],
-                 [ 6.,  7.,  6.,  7.,  6.,  7.]]]], dtype=float32)
+                [[ 1.,  3.,  1.,  3.,  1.,  3.],
+                 [ 5.,  7.,  5.,  7.,  5.,  7.],
+                 [ 1.,  3.,  1.,  3.,  1.,  3.],
+                 [ 5.,  7.,  5.,  7.,  5.,  7.]]]], dtype=float32)
 
     Args:
         operand: Input tensor, with dimensions :math:`[C \\times H \\times W]`.

diff --git a/bindings/python/cntk/ops/tests/reshaping_test.py b/bindings/python/cntk/ops/tests/reshaping_test.py
@@ -810,50 +810,42 @@ def check_grad_last_axis(input, root, indices, output):
 
 
 DEPTH_TO_SPACE_TEST_CASES = [
-    ((2, 3),    8,    2,              #(image_shape, num_channels, block_size)
-    [[[[ 0.,  1.,  0.,  1.,  0.,  1.],# output
-    [ 2.,  3.,  2.,  3.,  2.,  3.],
-    [ 0.,  1.,  0.,  1.,  0.,  1.],
-    [ 2.,  3.,  2.,  3.,  2.,  3.]],
-    [[ 4.,  5.,  4.,  5.,  4.,  5.],
-    [ 6.,  7.,  6.,  7.,  6.,  7.],
-    [ 4.,  5.,  4.,  5.,  4.,  5.],
-    [ 6.,  7.,  6., 7.,  6.,  7.]]]]),
-
-    ((4, 5),    9,    3,  #(image_shape, num_channels, block_size)
-    [[[[ 0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.], # output
-    [ 3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.],
-    [ 6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.],
-    [ 0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.],
-    [ 3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.],
-    [ 6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.],
-    [ 0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.],
-    [ 3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.],
-    [ 6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.],
-    [ 0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.,  0.,  1.,  2.],
-    [ 3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.,  3.,  4.,  5.],
-    [ 6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.,  6.,  7.,  8.]]]]),
+    ((2, 3), 8, 2),  #(image_shape, num_channels, block_size),
+    ((4, 5), 9, 3),
+    ((5, 7), 12, 2),
+    ((10, 10), 4, 2)
 ]
-@pytest.mark.parametrize("image_shape, num_channels, block_size, output_ref", DEPTH_TO_SPACE_TEST_CASES)
-def test_depth_to_space(image_shape, num_channels, block_size, output_ref, device_id, precision):
+@pytest.mark.parametrize("image_shape, num_channels, block_size", DEPTH_TO_SPACE_TEST_CASES)
+def test_depth_to_space(image_shape, num_channels, block_size, device_id, precision):
     dev = cntk_device(device_id)
     from cntk.internal import sanitize_dtype_cntk
 
     input_val = np.array(np.reshape(range(num_channels), (num_channels, 1, 1)), dtype=PRECISION_TO_TYPE[precision])
     input_val = np.tile(input_val, (1,) + image_shape)
     img = C.input_variable((num_channels,) + image_shape, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]))
+
+    # Result from depth_to_space node.
     depth_to_space_op = C.depth_to_space(img, block_size)
     output_test = depth_to_space_op.eval({ img : input_val })
 
+    # Reference result from simulating depth_to_space with other CNTK ops.
+    h, w = image_shape
+    reshape_node = C.reshape(img, (block_size, block_size, num_channels // (block_size**2), h, w))
+    transpose_node = C.transpose(reshape_node, [2, 3, 0, 4, 1])
+    depth_to_space_sim_op = C.reshape(transpose_node, (num_channels // (block_size**2), h * block_size, w * block_size))
+    output_ref = depth_to_space_sim_op.eval({ img : input_val })
+
     assert np.array_equal(output_test, output_ref)
 
 # space_to_depth is tested as a roundtrip, i.e. first a tensor is shuffled using depth_to_space
 # and its output is provided as the input to space_to_depth. The output os space_to_depth is
 # checked against the original input tensor for equality.
 SPACE_TO_DEPTH_TEST_CASES = [
     #(image_shape, num_channels, block_size)
-    ((2, 3),    8,    2),
-    ((4, 5),    9,    3),
+    ((2, 3), 8, 2),
+    ((4, 5), 9, 3),
+    ((5, 7), 12, 2),
+    ((10, 10), 4, 2)
 ]
 @pytest.mark.parametrize("image_shape, num_channels, block_size", SPACE_TO_DEPTH_TEST_CASES)
 def test_space_to_depth(image_shape, num_channels, block_size, device_id, precision):