Skip to content

Commit

Permalink
add PermuteLayer
Browse files Browse the repository at this point in the history
  • Loading branch information
weiliu89 committed Jan 24, 2016
1 parent 8ff0802 commit b68695d
Show file tree
Hide file tree
Showing 5 changed files with 547 additions and 1 deletion.
59 changes: 59 additions & 0 deletions include/caffe/layers/permute_layer.hpp
@@ -0,0 +1,59 @@
#ifndef CAFFE_PERMUTE_LAYER_HPP_
#define CAFFE_PERMUTE_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Permute the input blob by changing the memory order of the data.
*
* TODO(weiliu89): thorough documentation for Forward, Backward, and proto params.
*/

// The main function which does the permute.
template <typename Dtype>
void Permute(const int count, Dtype* bottom_data, const bool forward,
const int* permute_order, const int* old_steps, const int* new_steps,
const int num_axes, Dtype* top_data);

template <typename Dtype>
class PermuteLayer : public Layer<Dtype> {
public:
explicit PermuteLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Permute"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int num_axes_;
bool need_permute_;

// Use Blob because it is convenient to be accessible in .cu file.
Blob<int> permute_order_;
Blob<int> old_steps_;
Blob<int> new_steps_;
};

} // namespace caffe

#endif // CAFFE_PERMUTE_LAYER_HPP_
142 changes: 142 additions & 0 deletions src/caffe/layers/permute_layer.cpp
@@ -0,0 +1,142 @@
#include <vector>

#include "caffe/layers/permute_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void Permute(const int count, Dtype* bottom_data, const bool forward,
const int* permute_order, const int* old_steps, const int* new_steps,
const int num_axes, Dtype* top_data) {
for (int i = 0; i < count; ++i) {
int old_idx = 0;
int idx = i;
for (int j = 0; j < num_axes; ++j) {
int order = permute_order[j];
old_idx += (idx / new_steps[j]) * old_steps[order];
idx %= new_steps[j];
}
if (forward) {
top_data[i] = bottom_data[old_idx];
} else {
bottom_data[old_idx] = top_data[i];
}
}
}

template <typename Dtype>
void PermuteLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
PermuteParameter permute_param = this->layer_param_.permute_param();
CHECK_EQ(bottom.size(), 1);
num_axes_ = bottom[0]->num_axes();
vector<int> orders;
// Push the specified new orders.
for (int i = 0; i < permute_param.order_size(); ++i) {
int order = permute_param.order(i);
CHECK_LT(order, num_axes_)
<< "order should be less than the input dimension.";
if (std::find(orders.begin(), orders.end(), order) != orders.end()) {
LOG(FATAL) << "there are duplicate orders";
}
orders.push_back(order);
}
// Push the rest orders. And save original step sizes for each axis.
for (int i = 0; i < num_axes_; ++i) {
if (std::find(orders.begin(), orders.end(), i) == orders.end()) {
orders.push_back(i);
}
}
CHECK_EQ(num_axes_, orders.size());
// Check if we need to reorder the data or keep it.
need_permute_ = false;
for (int i = 0; i < num_axes_; ++i) {
if (orders[i] != i) {
// As long as there is one order which is different from the natural order
// of the data, we need to permute. Otherwise, we share the data and diff.
need_permute_ = true;
break;
}
}

vector<int> top_shape(num_axes_, 1);
permute_order_.Reshape(num_axes_, 1, 1, 1);
old_steps_.Reshape(num_axes_, 1, 1, 1);
new_steps_.Reshape(num_axes_, 1, 1, 1);
for (int i = 0; i < num_axes_; ++i) {
permute_order_.mutable_cpu_data()[i] = orders[i];
top_shape[i] = bottom[0]->shape(orders[i]);
}
top[0]->Reshape(top_shape);
}

template <typename Dtype>
void PermuteLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
vector<int> top_shape;
for (int i = 0; i < num_axes_; ++i) {
if (i == num_axes_ - 1) {
old_steps_.mutable_cpu_data()[i] = 1;
} else {
old_steps_.mutable_cpu_data()[i] = bottom[0]->count(i + 1);
}
top_shape.push_back(bottom[0]->shape(permute_order_.cpu_data()[i]));
}
top[0]->Reshape(top_shape);

for (int i = 0; i < num_axes_; ++i) {
if (i == num_axes_ - 1) {
new_steps_.mutable_cpu_data()[i] = 1;
} else {
new_steps_.mutable_cpu_data()[i] = top[0]->count(i + 1);
}
}
}

template <typename Dtype>
void PermuteLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (need_permute_) {
Dtype* bottom_data = bottom[0]->mutable_cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
const int top_count = top[0]->count();
const int* permute_order = permute_order_.cpu_data();
const int* old_steps = old_steps_.cpu_data();
const int* new_steps = new_steps_.cpu_data();
bool forward = true;
Permute(top_count, bottom_data, forward, permute_order, old_steps,
new_steps, num_axes_, top_data);
} else {
// If there is no need to permute, we share data to save memory.
top[0]->ShareData(*bottom[0]);
}
}

template <typename Dtype>
void PermuteLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (need_permute_) {
Dtype* top_diff = top[0]->mutable_cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
const int top_count = top[0]->count();
const int* permute_order = permute_order_.cpu_data();
const int* old_steps = old_steps_.cpu_data();
const int* new_steps = new_steps_.cpu_data();
bool forward = false;
Permute(top_count, bottom_diff, forward, permute_order, old_steps,
new_steps, num_axes_, top_diff);
} else {
// If there is no need to permute, we share diff to save memory.
bottom[0]->ShareDiff(*top[0]);
}
}

#ifdef CPU_ONLY
STUB_GPU(PermuteLayer);
#endif

INSTANTIATE_CLASS(PermuteLayer);
REGISTER_LAYER_CLASS(Permute);

} // namespace caffe
78 changes: 78 additions & 0 deletions src/caffe/layers/permute_layer.cu
@@ -0,0 +1,78 @@
#include <algorithm>
#include <cfloat>
#include <vector>

#include "caffe/layers/permute_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
__global__ void PermuteKernel(const int nthreads,
Dtype* const bottom_data, const bool forward, const int* permute_order,
const int* old_steps, const int* new_steps, const int num_axes,
Dtype* const top_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int temp_idx = index;
int old_idx = 0;
for (int i = 0; i < num_axes; ++i) {
int order = permute_order[i];
old_idx += (temp_idx / new_steps[i]) * old_steps[order];
temp_idx %= new_steps[i];
}
if (forward) {
top_data[index] = bottom_data[old_idx];
} else {
bottom_data[old_idx] = top_data[index];
}
}
}

template <typename Dtype>
void PermuteLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (need_permute_) {
Dtype* bottom_data = bottom[0]->mutable_gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
int count = top[0]->count();
const int* permute_order = permute_order_.gpu_data();
const int* new_steps = new_steps_.gpu_data();
const int* old_steps = old_steps_.gpu_data();
bool foward = true;
// NOLINT_NEXT_LINE(whitespace/operators)
PermuteKernel<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, foward, permute_order, old_steps, new_steps,
num_axes_, top_data);
CUDA_POST_KERNEL_CHECK;
} else {
// If there is no need to permute, we share data to save memory.
top[0]->ShareData(*bottom[0]);
}
}


template <typename Dtype>
void PermuteLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (need_permute_) {
Dtype* top_diff = top[0]->mutable_gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int count = bottom[0]->count();
const int* permute_order = permute_order_.gpu_data();
const int* new_steps = new_steps_.gpu_data();
const int* old_steps = old_steps_.gpu_data();
bool foward = false;
// NOLINT_NEXT_LINE(whitespace/operators)
PermuteKernel<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_diff, foward, permute_order, old_steps, new_steps,
num_axes_, top_diff);
CUDA_POST_KERNEL_CHECK;
} else {
// If there is no need to permute, we share diff to save memory.
bottom[0]->ShareDiff(*top[0]);
}
}

INSTANTIATE_LAYER_GPU_FUNCS(PermuteLayer);

} // namespace caffe
10 changes: 9 additions & 1 deletion src/caffe/proto/caffe.proto
Expand Up @@ -354,7 +354,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 144 (last added: multibox_loss_param)
// LayerParameter next available layer-specific ID: 145 (last added: permute_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
Expand Down Expand Up @@ -427,6 +427,7 @@ message LayerParameter {
optional MemoryDataParameter memory_data_param = 119;
optional MultiBoxLossParameter multibox_loss_param = 143;
optional MVNParameter mvn_param = 120;
optional PermuteParameter permute_param = 144;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
Expand Down Expand Up @@ -888,6 +889,13 @@ message MVNParameter {
optional float eps = 3 [default = 1e-9];
}

message PermuteParameter {
// The new orders of the axes of data. Notice it should be with
// in the same range as the input data, and it starts from 0.
// Do not provide repeated order.
repeated uint32 order = 1;
}

message PoolingParameter {
enum PoolMethod {
MAX = 0;
Expand Down

0 comments on commit b68695d

Please sign in to comment.