-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
547 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#ifndef CAFFE_PERMUTE_LAYER_HPP_ | ||
#define CAFFE_PERMUTE_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief Permute the input blob by changing the memory order of the data. | ||
* | ||
* TODO(weiliu89): thorough documentation for Forward, Backward, and proto params. | ||
*/ | ||
|
||
// The main function which does the permute. | ||
template <typename Dtype> | ||
void Permute(const int count, Dtype* bottom_data, const bool forward, | ||
const int* permute_order, const int* old_steps, const int* new_steps, | ||
const int num_axes, Dtype* top_data); | ||
|
||
template <typename Dtype> | ||
class PermuteLayer : public Layer<Dtype> { | ||
public: | ||
explicit PermuteLayer(const LayerParameter& param) | ||
: Layer<Dtype>(param) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "Permute"; } | ||
virtual inline int ExactNumBottomBlobs() const { return 1; } | ||
virtual inline int ExactNumTopBlobs() const { return 1; } | ||
|
||
protected: | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
int num_axes_; | ||
bool need_permute_; | ||
|
||
// Use Blob because it is convenient to be accessible in .cu file. | ||
Blob<int> permute_order_; | ||
Blob<int> old_steps_; | ||
Blob<int> new_steps_; | ||
}; | ||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_PERMUTE_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#include <vector> | ||
|
||
#include "caffe/layers/permute_layer.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
void Permute(const int count, Dtype* bottom_data, const bool forward, | ||
const int* permute_order, const int* old_steps, const int* new_steps, | ||
const int num_axes, Dtype* top_data) { | ||
for (int i = 0; i < count; ++i) { | ||
int old_idx = 0; | ||
int idx = i; | ||
for (int j = 0; j < num_axes; ++j) { | ||
int order = permute_order[j]; | ||
old_idx += (idx / new_steps[j]) * old_steps[order]; | ||
idx %= new_steps[j]; | ||
} | ||
if (forward) { | ||
top_data[i] = bottom_data[old_idx]; | ||
} else { | ||
bottom_data[old_idx] = top_data[i]; | ||
} | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void PermuteLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
PermuteParameter permute_param = this->layer_param_.permute_param(); | ||
CHECK_EQ(bottom.size(), 1); | ||
num_axes_ = bottom[0]->num_axes(); | ||
vector<int> orders; | ||
// Push the specified new orders. | ||
for (int i = 0; i < permute_param.order_size(); ++i) { | ||
int order = permute_param.order(i); | ||
CHECK_LT(order, num_axes_) | ||
<< "order should be less than the input dimension."; | ||
if (std::find(orders.begin(), orders.end(), order) != orders.end()) { | ||
LOG(FATAL) << "there are duplicate orders"; | ||
} | ||
orders.push_back(order); | ||
} | ||
// Push the rest orders. And save original step sizes for each axis. | ||
for (int i = 0; i < num_axes_; ++i) { | ||
if (std::find(orders.begin(), orders.end(), i) == orders.end()) { | ||
orders.push_back(i); | ||
} | ||
} | ||
CHECK_EQ(num_axes_, orders.size()); | ||
// Check if we need to reorder the data or keep it. | ||
need_permute_ = false; | ||
for (int i = 0; i < num_axes_; ++i) { | ||
if (orders[i] != i) { | ||
// As long as there is one order which is different from the natural order | ||
// of the data, we need to permute. Otherwise, we share the data and diff. | ||
need_permute_ = true; | ||
break; | ||
} | ||
} | ||
|
||
vector<int> top_shape(num_axes_, 1); | ||
permute_order_.Reshape(num_axes_, 1, 1, 1); | ||
old_steps_.Reshape(num_axes_, 1, 1, 1); | ||
new_steps_.Reshape(num_axes_, 1, 1, 1); | ||
for (int i = 0; i < num_axes_; ++i) { | ||
permute_order_.mutable_cpu_data()[i] = orders[i]; | ||
top_shape[i] = bottom[0]->shape(orders[i]); | ||
} | ||
top[0]->Reshape(top_shape); | ||
} | ||
|
||
template <typename Dtype> | ||
void PermuteLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
vector<int> top_shape; | ||
for (int i = 0; i < num_axes_; ++i) { | ||
if (i == num_axes_ - 1) { | ||
old_steps_.mutable_cpu_data()[i] = 1; | ||
} else { | ||
old_steps_.mutable_cpu_data()[i] = bottom[0]->count(i + 1); | ||
} | ||
top_shape.push_back(bottom[0]->shape(permute_order_.cpu_data()[i])); | ||
} | ||
top[0]->Reshape(top_shape); | ||
|
||
for (int i = 0; i < num_axes_; ++i) { | ||
if (i == num_axes_ - 1) { | ||
new_steps_.mutable_cpu_data()[i] = 1; | ||
} else { | ||
new_steps_.mutable_cpu_data()[i] = top[0]->count(i + 1); | ||
} | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void PermuteLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
if (need_permute_) { | ||
Dtype* bottom_data = bottom[0]->mutable_cpu_data(); | ||
Dtype* top_data = top[0]->mutable_cpu_data(); | ||
const int top_count = top[0]->count(); | ||
const int* permute_order = permute_order_.cpu_data(); | ||
const int* old_steps = old_steps_.cpu_data(); | ||
const int* new_steps = new_steps_.cpu_data(); | ||
bool forward = true; | ||
Permute(top_count, bottom_data, forward, permute_order, old_steps, | ||
new_steps, num_axes_, top_data); | ||
} else { | ||
// If there is no need to permute, we share data to save memory. | ||
top[0]->ShareData(*bottom[0]); | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void PermuteLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
if (need_permute_) { | ||
Dtype* top_diff = top[0]->mutable_cpu_diff(); | ||
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); | ||
const int top_count = top[0]->count(); | ||
const int* permute_order = permute_order_.cpu_data(); | ||
const int* old_steps = old_steps_.cpu_data(); | ||
const int* new_steps = new_steps_.cpu_data(); | ||
bool forward = false; | ||
Permute(top_count, bottom_diff, forward, permute_order, old_steps, | ||
new_steps, num_axes_, top_diff); | ||
} else { | ||
// If there is no need to permute, we share diff to save memory. | ||
bottom[0]->ShareDiff(*top[0]); | ||
} | ||
} | ||
|
||
#ifdef CPU_ONLY | ||
STUB_GPU(PermuteLayer); | ||
#endif | ||
|
||
INSTANTIATE_CLASS(PermuteLayer); | ||
REGISTER_LAYER_CLASS(Permute); | ||
|
||
} // namespace caffe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#include <algorithm> | ||
#include <cfloat> | ||
#include <vector> | ||
|
||
#include "caffe/layers/permute_layer.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
__global__ void PermuteKernel(const int nthreads, | ||
Dtype* const bottom_data, const bool forward, const int* permute_order, | ||
const int* old_steps, const int* new_steps, const int num_axes, | ||
Dtype* const top_data) { | ||
CUDA_KERNEL_LOOP(index, nthreads) { | ||
int temp_idx = index; | ||
int old_idx = 0; | ||
for (int i = 0; i < num_axes; ++i) { | ||
int order = permute_order[i]; | ||
old_idx += (temp_idx / new_steps[i]) * old_steps[order]; | ||
temp_idx %= new_steps[i]; | ||
} | ||
if (forward) { | ||
top_data[index] = bottom_data[old_idx]; | ||
} else { | ||
bottom_data[old_idx] = top_data[index]; | ||
} | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void PermuteLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
if (need_permute_) { | ||
Dtype* bottom_data = bottom[0]->mutable_gpu_data(); | ||
Dtype* top_data = top[0]->mutable_gpu_data(); | ||
int count = top[0]->count(); | ||
const int* permute_order = permute_order_.gpu_data(); | ||
const int* new_steps = new_steps_.gpu_data(); | ||
const int* old_steps = old_steps_.gpu_data(); | ||
bool foward = true; | ||
// NOLINT_NEXT_LINE(whitespace/operators) | ||
PermuteKernel<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( | ||
count, bottom_data, foward, permute_order, old_steps, new_steps, | ||
num_axes_, top_data); | ||
CUDA_POST_KERNEL_CHECK; | ||
} else { | ||
// If there is no need to permute, we share data to save memory. | ||
top[0]->ShareData(*bottom[0]); | ||
} | ||
} | ||
|
||
|
||
template <typename Dtype> | ||
void PermuteLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
if (need_permute_) { | ||
Dtype* top_diff = top[0]->mutable_gpu_diff(); | ||
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); | ||
const int count = bottom[0]->count(); | ||
const int* permute_order = permute_order_.gpu_data(); | ||
const int* new_steps = new_steps_.gpu_data(); | ||
const int* old_steps = old_steps_.gpu_data(); | ||
bool foward = false; | ||
// NOLINT_NEXT_LINE(whitespace/operators) | ||
PermuteKernel<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( | ||
count, bottom_diff, foward, permute_order, old_steps, new_steps, | ||
num_axes_, top_diff); | ||
CUDA_POST_KERNEL_CHECK; | ||
} else { | ||
// If there is no need to permute, we share diff to save memory. | ||
bottom[0]->ShareDiff(*top[0]); | ||
} | ||
} | ||
|
||
INSTANTIATE_LAYER_GPU_FUNCS(PermuteLayer); | ||
|
||
} // namespace caffe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.