add PermuteLayer

BVLC · Jan 24, 2016 · b68695d · b68695d
1 parent 8ff0802
commit b68695d
Show file tree

Hide file tree

Showing 5 changed files with 547 additions and 1 deletion.
diff --git a/include/caffe/layers/permute_layer.hpp b/include/caffe/layers/permute_layer.hpp
@@ -0,0 +1,59 @@
+#ifndef CAFFE_PERMUTE_LAYER_HPP_
+#define CAFFE_PERMUTE_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/**
+ * @brief Permute the input blob by changing the memory order of the data.
+ *
+ * TODO(weiliu89): thorough documentation for Forward, Backward, and proto params.
+ */
+
+// The main function which does the permute.
+template <typename Dtype>
+void Permute(const int count, Dtype* bottom_data, const bool forward,
+    const int* permute_order, const int* old_steps, const int* new_steps,
+    const int num_axes, Dtype* top_data);
+
+template <typename Dtype>
+class PermuteLayer : public Layer<Dtype> {
+ public:
+  explicit PermuteLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "Permute"; }
+  virtual inline int ExactNumBottomBlobs() const { return 1; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  int num_axes_;
+  bool need_permute_;
+
+  // Use Blob because it is convenient to be accessible in .cu file.
+  Blob<int> permute_order_;
+  Blob<int> old_steps_;
+  Blob<int> new_steps_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_PERMUTE_LAYER_HPP_
diff --git a/src/caffe/layers/permute_layer.cpp b/src/caffe/layers/permute_layer.cpp
@@ -0,0 +1,142 @@
+#include <vector>
+
+#include "caffe/layers/permute_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void Permute(const int count, Dtype* bottom_data, const bool forward,
+    const int* permute_order, const int* old_steps, const int* new_steps,
+    const int num_axes, Dtype* top_data) {
+    for (int i = 0; i < count; ++i) {
+      int old_idx = 0;
+      int idx = i;
+      for (int j = 0; j < num_axes; ++j) {
+        int order = permute_order[j];
+        old_idx += (idx / new_steps[j]) * old_steps[order];
+        idx %= new_steps[j];
+      }
+      if (forward) {
+        top_data[i] = bottom_data[old_idx];
+      } else {
+        bottom_data[old_idx] = top_data[i];
+      }
+    }
+}
+
+template <typename Dtype>
+void PermuteLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  PermuteParameter permute_param = this->layer_param_.permute_param();
+  CHECK_EQ(bottom.size(), 1);
+  num_axes_ = bottom[0]->num_axes();
+  vector<int> orders;
+  // Push the specified new orders.
+  for (int i = 0; i < permute_param.order_size(); ++i) {
+    int order = permute_param.order(i);
+    CHECK_LT(order, num_axes_)
+        << "order should be less than the input dimension.";
+    if (std::find(orders.begin(), orders.end(), order) != orders.end()) {
+      LOG(FATAL) << "there are duplicate orders";
+    }
+    orders.push_back(order);
+  }
+  // Push the rest orders. And save original step sizes for each axis.
+  for (int i = 0; i < num_axes_; ++i) {
+    if (std::find(orders.begin(), orders.end(), i) == orders.end()) {
+      orders.push_back(i);
+    }
+  }
+  CHECK_EQ(num_axes_, orders.size());
+  // Check if we need to reorder the data or keep it.
+  need_permute_ = false;
+  for (int i = 0; i < num_axes_; ++i) {
+    if (orders[i] != i) {
+      // As long as there is one order which is different from the natural order
+      // of the data, we need to permute. Otherwise, we share the data and diff.
+      need_permute_ = true;
+      break;
+    }
+  }
+
+  vector<int> top_shape(num_axes_, 1);
+  permute_order_.Reshape(num_axes_, 1, 1, 1);
+  old_steps_.Reshape(num_axes_, 1, 1, 1);
+  new_steps_.Reshape(num_axes_, 1, 1, 1);
+  for (int i = 0; i < num_axes_; ++i) {
+    permute_order_.mutable_cpu_data()[i] = orders[i];
+    top_shape[i] = bottom[0]->shape(orders[i]);
+  }
+  top[0]->Reshape(top_shape);
+}
+
+template <typename Dtype>
+void PermuteLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  vector<int> top_shape;
+  for (int i = 0; i < num_axes_; ++i) {
+    if (i == num_axes_ - 1) {
+      old_steps_.mutable_cpu_data()[i] = 1;
+    } else {
+      old_steps_.mutable_cpu_data()[i] = bottom[0]->count(i + 1);
+    }
+    top_shape.push_back(bottom[0]->shape(permute_order_.cpu_data()[i]));
+  }
+  top[0]->Reshape(top_shape);
+
+  for (int i = 0; i < num_axes_; ++i) {
+    if (i == num_axes_ - 1) {
+      new_steps_.mutable_cpu_data()[i] = 1;
+    } else {
+      new_steps_.mutable_cpu_data()[i] = top[0]->count(i + 1);
+    }
+  }
+}
+
+template <typename Dtype>
+void PermuteLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  if (need_permute_) {
+    Dtype* bottom_data = bottom[0]->mutable_cpu_data();
+    Dtype* top_data = top[0]->mutable_cpu_data();
+    const int top_count = top[0]->count();
+    const int* permute_order = permute_order_.cpu_data();
+    const int* old_steps = old_steps_.cpu_data();
+    const int* new_steps = new_steps_.cpu_data();
+    bool forward = true;
+    Permute(top_count, bottom_data, forward, permute_order, old_steps,
+            new_steps, num_axes_, top_data);
+  } else {
+    // If there is no need to permute, we share data to save memory.
+    top[0]->ShareData(*bottom[0]);
+  }
+}
+
+template <typename Dtype>
+void PermuteLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (need_permute_) {
+    Dtype* top_diff = top[0]->mutable_cpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    const int top_count = top[0]->count();
+    const int* permute_order = permute_order_.cpu_data();
+    const int* old_steps = old_steps_.cpu_data();
+    const int* new_steps = new_steps_.cpu_data();
+    bool forward = false;
+    Permute(top_count, bottom_diff, forward, permute_order, old_steps,
+            new_steps, num_axes_, top_diff);
+  } else {
+    // If there is no need to permute, we share diff to save memory.
+    bottom[0]->ShareDiff(*top[0]);
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(PermuteLayer);
+#endif
+
+INSTANTIATE_CLASS(PermuteLayer);
+REGISTER_LAYER_CLASS(Permute);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/permute_layer.cu b/src/caffe/layers/permute_layer.cu
@@ -0,0 +1,78 @@
+#include <algorithm>
+#include <cfloat>
+#include <vector>
+
+#include "caffe/layers/permute_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void PermuteKernel(const int nthreads,
+    Dtype* const bottom_data, const bool forward, const int* permute_order,
+    const int* old_steps, const int* new_steps, const int num_axes,
+    Dtype* const top_data) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    int temp_idx = index;
+    int old_idx = 0;
+    for (int i = 0; i < num_axes; ++i) {
+      int order = permute_order[i];
+      old_idx += (temp_idx / new_steps[i]) * old_steps[order];
+      temp_idx %= new_steps[i];
+    }
+    if (forward) {
+      top_data[index] = bottom_data[old_idx];
+    } else {
+      bottom_data[old_idx] = top_data[index];
+    }
+  }
+}
+
+template <typename Dtype>
+void PermuteLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  if (need_permute_) {
+    Dtype* bottom_data = bottom[0]->mutable_gpu_data();
+    Dtype* top_data = top[0]->mutable_gpu_data();
+    int count = top[0]->count();
+    const int* permute_order = permute_order_.gpu_data();
+    const int* new_steps = new_steps_.gpu_data();
+    const int* old_steps = old_steps_.gpu_data();
+    bool foward = true;
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    PermuteKernel<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, bottom_data, foward, permute_order, old_steps, new_steps,
+        num_axes_, top_data);
+    CUDA_POST_KERNEL_CHECK;
+  } else {
+    // If there is no need to permute, we share data to save memory.
+    top[0]->ShareData(*bottom[0]);
+  }
+}
+
+
+template <typename Dtype>
+void PermuteLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (need_permute_) {
+    Dtype* top_diff = top[0]->mutable_gpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    const int count = bottom[0]->count();
+    const int* permute_order = permute_order_.gpu_data();
+    const int* new_steps = new_steps_.gpu_data();
+    const int* old_steps = old_steps_.gpu_data();
+    bool foward = false;
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    PermuteKernel<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, bottom_diff, foward, permute_order, old_steps, new_steps,
+        num_axes_, top_diff);
+    CUDA_POST_KERNEL_CHECK;
+  } else {
+    // If there is no need to permute, we share diff to save memory.
+    bottom[0]->ShareDiff(*top[0]);
+  }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(PermuteLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -354,7 +354,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 144 (last added: multibox_loss_param)
+// LayerParameter next available layer-specific ID: 145 (last added: permute_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -427,6 +427,7 @@ message LayerParameter {
   optional MemoryDataParameter memory_data_param = 119;
   optional MultiBoxLossParameter multibox_loss_param = 143;
   optional MVNParameter mvn_param = 120;
+  optional PermuteParameter permute_param = 144;
   optional PoolingParameter pooling_param = 121;
   optional PowerParameter power_param = 122;
   optional PReLUParameter prelu_param = 131;
@@ -888,6 +889,13 @@ message MVNParameter {
   optional float eps = 3 [default = 1e-9];
 }
 
+message PermuteParameter {
+  // The new orders of the axes of data. Notice it should be with
+  // in the same range as the input data, and it starts from 0.
+  // Do not provide repeated order.
+  repeated uint32 order = 1;
+}
+
 message PoolingParameter {
   enum PoolMethod {
     MAX = 0;