Permalink
...
Comparing changes
Open a pull request
- 14 commits
- 10 files changed
- 0 commit comments
- 6 contributors
Commits on Apr 19, 2017
|
|
33f8612
|
||||
|
|
cd1696d
|
Commits on May 04, 2017
|
|
ec35395
|
||||
|
|
shelhamer |
c293d9d
|
|||
|
|
shelhamer |
7d3f8a7
|
Commits on May 12, 2017
|
|
lukeyeager |
b7e2b99
|
|||
|
|
shelhamer |
25391bf
|
Commits on May 15, 2017
|
|
CDLuminate |
30a2ab7
|
|||
|
|
CDLuminate |
83814da
|
|||
|
|
shelhamer |
27ffbbf
|
|||
|
|
shelhamer |
91b0928
|
Commits on Jun 13, 2017
|
|
cypof |
264cf19
|
Commits on Jun 14, 2017
|
|
cypof |
f16b5f2
|
Commits on Jun 20, 2017
|
|
cypof |
4efdf7e
|
Unified
Split
Showing
with
106 additions
and 101 deletions.
- +8 −0 README.md
- +1 −1 cmake/ConfigGen.cmake
- +1 −1 cmake/Dependencies.cmake
- +12 −8 docs/install_apt_debian.md
- +4 −2 include/caffe/layers/crop_layer.hpp
- +3 −1 matlab/+caffe/Net.m
- +4 −4 scripts/travis/install-deps.sh
- +6 −6 src/caffe/CMakeLists.txt
- +16 −5 src/caffe/layers/crop_layer.cpp
- +51 −73 src/caffe/layers/crop_layer.cu
View
8
README.md
| @@ -15,6 +15,14 @@ Check out the [project site](http://caffe.berkeleyvision.org) for all the detail | ||
| and step-by-step examples. | ||
| +## Custom distributions | ||
| + | ||
| + - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, Xeon Phi). | ||
| +- [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices. | ||
| +- [Windows Caffe](https://github.com/BVLC/caffe/tree/windows) | ||
| + | ||
| +## Community | ||
| + | ||
| [](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | ||
| Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models. | ||
View
2
cmake/ConfigGen.cmake
| @@ -33,7 +33,7 @@ function(caffe_generate_export_configs) | ||
| configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/CaffeConfig.cmake" @ONLY) | ||
| # Add targets to the build-tree export set | ||
| - export(TARGETS caffe proto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake") | ||
| + export(TARGETS caffe caffeproto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake") | ||
| export(PACKAGE Caffe) | ||
| # ---[ Configure install-tree CaffeConfig.cmake file ]--- | ||
View
2
cmake/Dependencies.cmake
| @@ -5,7 +5,7 @@ set(Caffe_DEFINITIONS "") | ||
| set(Caffe_COMPILE_OPTIONS "") | ||
| # ---[ Boost | ||
| -find_package(Boost 1.55 REQUIRED COMPONENTS system thread filesystem) | ||
| +find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem) | ||
| list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Boost_INCLUDE_DIRS}) | ||
| list(APPEND Caffe_LINKER_LIBS PUBLIC ${Boost_LIBRARIES}) | ||
View
20
docs/install_apt_debian.md
| @@ -96,18 +96,22 @@ Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`. | ||
| Some users may find their favorate compiler doesn't work with CUDA. | ||
| ``` | ||
| -CXX compiler | CUDA 7.5 | CUDA 8.0 | | ||
| --------------+------------+------------+- | ||
| -GCC-7 | ? | ? | | ||
| -GCC-6 | ✘ | ✘ | | ||
| -GCC-5 | ✔ [1] | ✔ | | ||
| -CLANG-4.0 | ? | ? | | ||
| -CLANG-3.9 | ✘ | ✘ | | ||
| -CLANG-3.8 | ? | ✔ | | ||
| +CXX compiler | CUDA 7.5 | CUDA 8.0 | CUDA 9.0 | | ||
| +-------------+------------+------------+------------+ | ||
| +GCC-8 | ? | ? | ? | | ||
| +GCC-7 | ? | ? | ? | | ||
| +GCC-6 | ✘ | ✘ | ✔ | | ||
| +GCC-5 | ✔ [1] | ✔ | ✔ | | ||
| +-------------+------------+------------+------------+ | ||
| +CLANG-4.0 | ? | ? | ? | | ||
| +CLANG-3.9 | ✘ | ✘ | ✔ | | ||
| +CLANG-3.8 | ? | ✔ | ✔ | | ||
| ``` | ||
| `[1]` CUDA 7.5 's `host_config.h` must be patched before working with GCC-5. | ||
| +`[2]` CUDA 9.0: https://devblogs.nvidia.com/parallelforall/cuda-9-features-revealed/ | ||
| + | ||
| BTW, please forget the GCC-4.X series, since its `libstdc++` ABI is not compatible with GCC-5's. | ||
| You may encounter failure linking GCC-4.X object files against GCC-5 libraries. | ||
| (See https://wiki.debian.org/GCC5 ) | ||
View
6
include/caffe/layers/crop_layer.hpp
| @@ -41,13 +41,15 @@ class CropLayer : public Layer<Dtype> { | ||
| virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
| const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
| - vector<int> offsets; | ||
| + Blob<int> offsets; | ||
| + Blob<int> src_strides_; | ||
| + Blob<int> dest_strides_; | ||
| private: | ||
| // Recursive copy function. | ||
| void crop_copy(const vector<Blob<Dtype>*>& bottom, | ||
| const vector<Blob<Dtype>*>& top, | ||
| - const vector<int>& offsets, | ||
| + const int* offsets, | ||
| vector<int> indices, | ||
| int cur_dim, | ||
| const Dtype* src_data, | ||
View
4
matlab/+caffe/Net.m
| @@ -69,7 +69,9 @@ | ||
| self.blob_names = self.attributes.blob_names; | ||
| end | ||
| function delete (self) | ||
| - caffe_('delete_net', self.hNet_self); | ||
| + if ~isempty(self.hNet_self) | ||
| + caffe_('delete_net', self.hNet_self); | ||
| + end | ||
| end | ||
| function layer = layers(self, layer_name) | ||
| CHECK(ischar(layer_name), 'layer_name must be a string'); | ||
View
8
scripts/travis/install-deps.sh
| @@ -9,10 +9,10 @@ apt-get -y update | ||
| apt-get install -y --no-install-recommends \ | ||
| build-essential \ | ||
| graphviz \ | ||
| - libboost-filesystem1.55-dev \ | ||
| - libboost-python1.55-dev \ | ||
| - libboost-system1.55-dev \ | ||
| - libboost-thread1.55-dev \ | ||
| + libboost-filesystem-dev \ | ||
| + libboost-python-dev \ | ||
| + libboost-system-dev \ | ||
| + libboost-thread-dev \ | ||
| libgflags-dev \ | ||
| libgoogle-glog-dev \ | ||
| libhdf5-serial-dev \ | ||
View
12
src/caffe/CMakeLists.txt
| @@ -3,12 +3,12 @@ file(GLOB proto_files proto/*.proto) | ||
| caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python ${proto_files}) | ||
| # include python files either to force generation | ||
| -add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python}) | ||
| -caffe_default_properties(proto) | ||
| -target_link_libraries(proto PUBLIC ${PROTOBUF_LIBRARIES}) | ||
| -target_include_directories(proto PUBLIC ${PROTOBUF_INCLUDE_DIR}) | ||
| +add_library(caffeproto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python}) | ||
| +caffe_default_properties(caffeproto) | ||
| +target_link_libraries(caffeproto PUBLIC ${PROTOBUF_LIBRARIES}) | ||
| +target_include_directories(caffeproto PUBLIC ${PROTOBUF_INCLUDE_DIR}) | ||
| -list(INSERT Caffe_LINKER_LIBS 0 PUBLIC proto) # note, crucial to prepend! | ||
| +list(INSERT Caffe_LINKER_LIBS 0 PUBLIC caffeproto) # note, crucial to prepend! | ||
| # --[ Caffe library | ||
| @@ -42,7 +42,7 @@ set_target_properties(caffe PROPERTIES | ||
| # ---[ Install | ||
| install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | ||
| install(FILES ${proto_hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/caffe/proto) | ||
| -install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR}) | ||
| +install(TARGETS caffe caffeproto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR}) | ||
| file(WRITE ${PROJECT_BINARY_DIR}/__init__.py) | ||
| list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py) | ||
View
21
src/caffe/layers/crop_layer.cpp
| @@ -40,8 +40,10 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
| const int start_axis = bottom[0]->CanonicalAxisIndex(param.axis()); | ||
| // Initialize offsets to 0 and the new shape to the current shape of the data. | ||
| - offsets = vector<int>(input_dim, 0); | ||
| vector<int> new_shape(bottom[0]->shape()); | ||
| + vector<int> offsets_shape(1, input_dim); | ||
| + offsets.Reshape(offsets_shape); | ||
| + int* offset_data = offsets.mutable_cpu_data(); | ||
| // Determine crop offsets and the new shape post-crop. | ||
| for (int i = 0; i < input_dim; ++i) { | ||
| @@ -63,15 +65,22 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
| << "size " << bottom[1]->shape(i) << " and offset " << crop_offset; | ||
| } | ||
| new_shape[i] = new_size; | ||
| - offsets[i] = crop_offset; | ||
| + offset_data[i] = crop_offset; | ||
| } | ||
| top[0]->Reshape(new_shape); | ||
| + // Compute strides | ||
| + src_strides_.Reshape(offsets_shape); | ||
| + dest_strides_.Reshape(offsets_shape); | ||
| + for (int i = 0; i < input_dim; ++i) { | ||
| + src_strides_.mutable_cpu_data()[i] = bottom[0]->count(i + 1, input_dim); | ||
| + dest_strides_.mutable_cpu_data()[i] = top[0]->count(i + 1, input_dim); | ||
| + } | ||
| } | ||
| template <typename Dtype> | ||
| void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom, | ||
| const vector<Blob<Dtype>*>& top, | ||
| - const vector<int>& offsets, | ||
| + const int* offsets, | ||
| vector<int> indices, | ||
| int cur_dim, | ||
| const Dtype* src_data, | ||
| @@ -115,7 +124,8 @@ void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
| std::vector<int> indices(top[0]->num_axes(), 0); | ||
| const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
| Dtype* top_data = top[0]->mutable_cpu_data(); | ||
| - crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true); | ||
| + crop_copy(bottom, top, offsets.cpu_data(), indices, 0, bottom_data, top_data, | ||
| + true); | ||
| } | ||
| template <typename Dtype> | ||
| @@ -127,7 +137,8 @@ void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
| if (propagate_down[0]) { | ||
| caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff); | ||
| std::vector<int> indices(top[0]->num_axes(), 0); | ||
| - crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false); | ||
| + crop_copy(bottom, top, offsets.cpu_data(), indices, 0, top_diff, | ||
| + bottom_diff, false); | ||
| } | ||
| } | ||
View
124
src/caffe/layers/crop_layer.cu
| @@ -4,103 +4,81 @@ | ||
| namespace caffe { | ||
| -// Copy (one line per thread) from one array to another, with arbitrary | ||
| -// strides in the last two dimensions. | ||
| +__device__ int compute_uncropped_index( | ||
| + int index, | ||
| + const int ndims, | ||
| + const int* src_strides, | ||
| + const int* dest_strides, | ||
| + const int* offsets) { | ||
| + int dest_index = index; | ||
| + int src_index = 0; | ||
| + for (int i = 0; i < ndims; ++i) { | ||
| + int coord = dest_index / dest_strides[i]; | ||
| + dest_index -= coord * dest_strides[i]; | ||
| + src_index += src_strides[i] * (coord + offsets[i]); | ||
| + } | ||
| + return src_index; | ||
| +} | ||
| + | ||
| template <typename Dtype> | ||
| -__global__ void copy_kernel(const int n, const int height, const int width, | ||
| - const int src_inner_stride, | ||
| - const int dest_inner_stride, | ||
| +__global__ void crop_kernel_forward(const int nthreads, | ||
| + const int ndims, | ||
| + const int* src_strides, | ||
| + const int* dest_strides, | ||
| + const int* offsets, | ||
| const Dtype* src, Dtype* dest) { | ||
| - CUDA_KERNEL_LOOP(index, n) { | ||
| - int src_start = index * src_inner_stride; | ||
| - int dest_start = index * dest_inner_stride; | ||
| - for (int i = 0; i < width; ++i) { | ||
| - dest[dest_start + i] = src[src_start + i]; | ||
| - } | ||
| + CUDA_KERNEL_LOOP(index, nthreads) { | ||
| + int src_index = compute_uncropped_index( | ||
| + index, ndims, src_strides, dest_strides, offsets); | ||
| + dest[index] = src[src_index]; | ||
| } | ||
| } | ||
| template <typename Dtype> | ||
| -void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom, | ||
| - const vector<Blob<Dtype>*>& top, | ||
| - const vector<int>& offsets, | ||
| - vector<int> indices, | ||
| - int cur_dim, | ||
| - const Dtype* src_data, | ||
| - Dtype* dest_data, | ||
| - bool is_forward) { | ||
| - if (cur_dim + 2 < top[0]->num_axes()) { | ||
| - // We are not yet at the final dimension, call copy recursivley | ||
| - for (int i = 0; i < top[0]->shape(cur_dim); ++i) { | ||
| - indices[cur_dim] = i; | ||
| - crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1, | ||
| - src_data, dest_data, is_forward); | ||
| - } | ||
| - } else { | ||
| - // We are at the last two dimensions, which are stored continuously in | ||
| - // memory. With (N,C,H,W) | ||
| - // (0,1,2,3) cur_dim -> H | ||
| - // cur_dim+1 -> W | ||
| - const int lines = top[0]->shape(cur_dim); | ||
| - const int height = top[0]->shape(cur_dim); | ||
| - const int width = top[0]->shape(cur_dim+1); | ||
| - std::vector<int> ind_off(cur_dim+2, 0); | ||
| - for (int j = 0; j < cur_dim; ++j) { | ||
| - ind_off[j] = indices[j] + offsets[j]; | ||
| - } | ||
| - ind_off[cur_dim] = offsets[cur_dim]; | ||
| - ind_off[cur_dim+1] = offsets[cur_dim+1]; | ||
| - // Compute copy strides | ||
| - const int src_inner_stride = bottom[0]->shape(cur_dim+1); | ||
| - const int dest_inner_stride = top[0]->shape(cur_dim+1); | ||
| - | ||
| - if (is_forward) { | ||
| - const Dtype* bottom_data = bottom[0]->gpu_data() + | ||
| - bottom[0]->offset(ind_off); | ||
| - Dtype* top_data = top[0]->mutable_gpu_data() + | ||
| - top[0]->offset(indices); | ||
| - // NOLINT_NEXT_LINE(whitespace/operators) | ||
| - copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>( | ||
| - lines, height, width, | ||
| - src_inner_stride, | ||
| - dest_inner_stride, | ||
| - bottom_data, top_data); | ||
| - | ||
| - } else { | ||
| - const Dtype* top_diff = top[0]->gpu_diff() + | ||
| - top[0]->offset(indices); | ||
| - Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() + | ||
| - bottom[0]->offset(ind_off); | ||
| - // NOLINT_NEXT_LINE(whitespace/operators) | ||
| - copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>( | ||
| - lines, height, width, | ||
| - dest_inner_stride, | ||
| - src_inner_stride, | ||
| - top_diff, bottom_diff); | ||
| - } | ||
| +__global__ void crop_kernel_backward(const int nthreads, | ||
| + const int ndims, | ||
| + const int* src_strides, | ||
| + const int* dest_strides, | ||
| + const int* offsets, | ||
| + Dtype* src, const Dtype* dest) { | ||
| + CUDA_KERNEL_LOOP(index, nthreads) { | ||
| + int src_index = compute_uncropped_index( | ||
| + index, ndims, src_strides, dest_strides, offsets); | ||
| + src[src_index] = dest[index]; | ||
| } | ||
| } | ||
| template <typename Dtype> | ||
| void CropLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
| const vector<Blob<Dtype>*>& top) { | ||
| - std::vector<int> indices(top[0]->num_axes(), 0); | ||
| const Dtype* bottom_data = bottom[0]->gpu_data(); | ||
| Dtype* top_data = top[0]->mutable_gpu_data(); | ||
| - crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true); | ||
| + int n = top[0]->count(); | ||
| + // NOLINT_NEXT_LINE(whitespace/operators) | ||
| + crop_kernel_forward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n, | ||
| + bottom[0]->num_axes(), | ||
| + src_strides_.gpu_data(), | ||
| + dest_strides_.gpu_data(), | ||
| + offsets.gpu_data(), | ||
| + bottom_data, top_data); | ||
| } | ||
| template <typename Dtype> | ||
| void CropLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
| const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
| const Dtype* top_diff = top[0]->gpu_diff(); | ||
| Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); | ||
| + int n = top[0]->count(); | ||
| if (propagate_down[0]) { | ||
| caffe_gpu_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff); | ||
| - std::vector<int> indices(top[0]->num_axes(), 0); | ||
| - crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff, | ||
| - false); | ||
| + // NOLINT_NEXT_LINE(whitespace/operators) | ||
| + crop_kernel_backward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n, | ||
| + bottom[0]->num_axes(), | ||
| + src_strides_.gpu_data(), | ||
| + dest_strides_.gpu_data(), | ||
| + offsets.gpu_data(), | ||
| + bottom_diff, top_diff); | ||
| } | ||
| } | ||