From 33f86122970392fcda19ef80ed5cd349279b896d Mon Sep 17 00:00:00 2001 From: Eric Tzeng Date: Tue, 18 Apr 2017 18:22:38 -0700 Subject: [PATCH 1/8] Rewrite crop cuda kernel --- include/caffe/layers/crop_layer.hpp | 6 +- src/caffe/layers/crop_layer.cpp | 21 +++++-- src/caffe/layers/crop_layer.cu | 122 +++++++++++++++--------------------- 3 files changed, 69 insertions(+), 80 deletions(-) diff --git a/include/caffe/layers/crop_layer.hpp b/include/caffe/layers/crop_layer.hpp index c4fda1220c3..5219fa5cb5f 100644 --- a/include/caffe/layers/crop_layer.hpp +++ b/include/caffe/layers/crop_layer.hpp @@ -41,13 +41,15 @@ class CropLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - vector offsets; + Blob offsets; + Blob src_strides_; + Blob dest_strides_; private: // Recursive copy function. void crop_copy(const vector*>& bottom, const vector*>& top, - const vector& offsets, + const int* offsets, vector indices, int cur_dim, const Dtype* src_data, diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp index ef8c177c4dd..65ea8f8b7d0 100644 --- a/src/caffe/layers/crop_layer.cpp +++ b/src/caffe/layers/crop_layer.cpp @@ -40,8 +40,10 @@ void CropLayer::Reshape(const vector*>& bottom, const int start_axis = bottom[0]->CanonicalAxisIndex(param.axis()); // Initialize offsets to 0 and the new shape to the current shape of the data. - offsets = vector(input_dim, 0); vector new_shape(bottom[0]->shape()); + vector offsets_shape(1, input_dim); + offsets.Reshape(offsets_shape); + int* offset_data = offsets.mutable_cpu_data(); // Determine crop offsets and the new shape post-crop. for (int i = 0; i < input_dim; ++i) { @@ -63,15 +65,22 @@ void CropLayer::Reshape(const vector*>& bottom, << "size " << bottom[1]->shape(i) << " and offset " << crop_offset; } new_shape[i] = new_size; - offsets[i] = crop_offset; + offset_data[i] = crop_offset; } top[0]->Reshape(new_shape); + // Compute strides + src_strides_.Reshape(offsets_shape); + dest_strides_.Reshape(offsets_shape); + for (int i = 0; i < input_dim; ++i) { + src_strides_.mutable_cpu_data()[i] = bottom[0]->count(i + 1, input_dim); + dest_strides_.mutable_cpu_data()[i] = top[0]->count(i + 1, input_dim); + } } template void CropLayer::crop_copy(const vector*>& bottom, const vector*>& top, - const vector& offsets, + const int* offsets, vector indices, int cur_dim, const Dtype* src_data, @@ -115,7 +124,8 @@ void CropLayer::Forward_cpu(const vector*>& bottom, std::vector indices(top[0]->num_axes(), 0); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); - crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true); + crop_copy(bottom, top, offsets.cpu_data(), indices, 0, bottom_data, top_data, + true); } template @@ -127,7 +137,8 @@ void CropLayer::Backward_cpu(const vector*>& top, if (propagate_down[0]) { caffe_set(bottom[0]->count(), static_cast(0), bottom_diff); std::vector indices(top[0]->num_axes(), 0); - crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false); + crop_copy(bottom, top, offsets.cpu_data(), indices, 0, top_diff, + bottom_diff, false); } } diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu index 677077cdd8b..a400f333e14 100644 --- a/src/caffe/layers/crop_layer.cu +++ b/src/caffe/layers/crop_layer.cu @@ -4,90 +4,62 @@ namespace caffe { -// Copy (one line per thread) from one array to another, with arbitrary -// strides in the last two dimensions. +__device__ int compute_uncropped_index( + int index, + const int ndims, + const int* src_strides, + const int* dest_strides, + const int* offsets) { + int dest_index = index; + int src_index = 0; + for (int i = 0; i < ndims; ++i) { + int coord = dest_index / dest_strides[i]; + dest_index -= coord * dest_strides[i]; + src_index += src_strides[i] * (coord + offsets[i]); + } + return src_index; +} + template -__global__ void copy_kernel(const int n, const int height, const int width, - const int src_inner_stride, - const int dest_inner_stride, +__global__ void crop_kernel_forward(const int nthreads, + const int ndims, + const int* src_strides, + const int* dest_strides, + const int* offsets, const Dtype* src, Dtype* dest) { - CUDA_KERNEL_LOOP(index, n) { - int src_start = index * src_inner_stride; - int dest_start = index * dest_inner_stride; - for (int i = 0; i < width; ++i) { - dest[dest_start + i] = src[src_start + i]; - } + CUDA_KERNEL_LOOP(index, nthreads) { + int src_index = compute_uncropped_index( + index, ndims, src_strides, dest_strides, offsets); + dest[index] = src[src_index]; } } template -void CropLayer::crop_copy_gpu(const vector*>& bottom, - const vector*>& top, - const vector& offsets, - vector indices, - int cur_dim, - const Dtype* src_data, - Dtype* dest_data, - bool is_forward) { - if (cur_dim + 2 < top[0]->num_axes()) { - // We are not yet at the final dimension, call copy recursivley - for (int i = 0; i < top[0]->shape(cur_dim); ++i) { - indices[cur_dim] = i; - crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1, - src_data, dest_data, is_forward); - } - } else { - // We are at the last two dimensions, which are stored continuously in - // memory. With (N,C,H,W) - // (0,1,2,3) cur_dim -> H - // cur_dim+1 -> W - const int lines = top[0]->shape(cur_dim); - const int height = top[0]->shape(cur_dim); - const int width = top[0]->shape(cur_dim+1); - std::vector ind_off(cur_dim+2, 0); - for (int j = 0; j < cur_dim; ++j) { - ind_off[j] = indices[j] + offsets[j]; - } - ind_off[cur_dim] = offsets[cur_dim]; - ind_off[cur_dim+1] = offsets[cur_dim+1]; - // Compute copy strides - const int src_inner_stride = bottom[0]->shape(cur_dim+1); - const int dest_inner_stride = top[0]->shape(cur_dim+1); - - if (is_forward) { - const Dtype* bottom_data = bottom[0]->gpu_data() + - bottom[0]->offset(ind_off); - Dtype* top_data = top[0]->mutable_gpu_data() + - top[0]->offset(indices); - // NOLINT_NEXT_LINE(whitespace/operators) - copy_kernel<<>>( - lines, height, width, - src_inner_stride, - dest_inner_stride, - bottom_data, top_data); - - } else { - const Dtype* top_diff = top[0]->gpu_diff() + - top[0]->offset(indices); - Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() + - bottom[0]->offset(ind_off); - // NOLINT_NEXT_LINE(whitespace/operators) - copy_kernel<<>>( - lines, height, width, - dest_inner_stride, - src_inner_stride, - top_diff, bottom_diff); - } +__global__ void crop_kernel_backward(const int nthreads, + const int ndims, + const int* src_strides, + const int* dest_strides, + const int* offsets, + Dtype* src, const Dtype* dest) { + CUDA_KERNEL_LOOP(index, nthreads) { + int src_index = compute_uncropped_index( + index, ndims, src_strides, dest_strides, offsets); + src[src_index] = dest[index]; } } template void CropLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { - std::vector indices(top[0]->num_axes(), 0); const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); - crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true); + int n = top[0]->count(); + crop_kernel_forward<<>>(n, + bottom[0]->num_axes(), + src_strides_.gpu_data(), + dest_strides_.gpu_data(), + offsets.gpu_data(), + bottom_data, top_data); } template @@ -95,12 +67,16 @@ void CropLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + int n = top[0]->count(); if (propagate_down[0]) { caffe_gpu_set(bottom[0]->count(), static_cast(0), bottom_diff); - std::vector indices(top[0]->num_axes(), 0); - crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff, - false); + crop_kernel_backward<<>>(n, + bottom[0]->num_axes(), + src_strides_.gpu_data(), + dest_strides_.gpu_data(), + offsets.gpu_data(), + bottom_diff, top_diff); } } From cd1696d00b995a1d8567cb6f3ad7f65ec4df4176 Mon Sep 17 00:00:00 2001 From: Eric Tzeng Date: Tue, 18 Apr 2017 18:48:26 -0700 Subject: [PATCH 2/8] Fix crop layer lint errors --- src/caffe/layers/crop_layer.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu index a400f333e14..4ece9cd1761 100644 --- a/src/caffe/layers/crop_layer.cu +++ b/src/caffe/layers/crop_layer.cu @@ -54,6 +54,7 @@ void CropLayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); int n = top[0]->count(); + // NOLINT_NEXT_LINE(whitespace/operators) crop_kernel_forward<<>>(n, bottom[0]->num_axes(), src_strides_.gpu_data(), @@ -71,6 +72,7 @@ void CropLayer::Backward_gpu(const vector*>& top, if (propagate_down[0]) { caffe_gpu_set(bottom[0]->count(), static_cast(0), bottom_diff); + // NOLINT_NEXT_LINE(whitespace/operators) crop_kernel_backward<<>>(n, bottom[0]->num_axes(), src_strides_.gpu_data(), From ec35395e131a0d5e7c55cbd74dadbd46a49a645c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Malte=20St=C3=A6r=20Nissen?= Date: Thu, 4 May 2017 14:33:40 +0200 Subject: [PATCH 3/8] Handling destruction of empty Net objects --- matlab/+caffe/Net.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/matlab/+caffe/Net.m b/matlab/+caffe/Net.m index 349e060eb22..bb99ec89049 100644 --- a/matlab/+caffe/Net.m +++ b/matlab/+caffe/Net.m @@ -69,7 +69,9 @@ self.blob_names = self.attributes.blob_names; end function delete (self) - caffe_('delete_net', self.hNet_self); + if ~isempty(self.hNet_self) + caffe_('delete_net', self.hNet_self); + end end function layer = layers(self, layer_name) CHECK(ischar(layer_name), 'layer_name must be a string'); From b7e2b99c7f0aeeb8e24046f8cbf5212065b9ccdf Mon Sep 17 00:00:00 2001 From: Luke Yeager Date: Fri, 12 May 2017 10:06:51 -0700 Subject: [PATCH 4/8] Downgrade boost requirement from 1.55 to 1.54 --- cmake/Dependencies.cmake | 2 +- scripts/travis/install-deps.sh | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 4a5bac471b4..c48255c89f2 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -5,7 +5,7 @@ set(Caffe_DEFINITIONS "") set(Caffe_COMPILE_OPTIONS "") # ---[ Boost -find_package(Boost 1.55 REQUIRED COMPONENTS system thread filesystem) +find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem) list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Boost_INCLUDE_DIRS}) list(APPEND Caffe_LINKER_LIBS PUBLIC ${Boost_LIBRARIES}) diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh index dac5d2f9d37..2fa2a74a486 100755 --- a/scripts/travis/install-deps.sh +++ b/scripts/travis/install-deps.sh @@ -9,10 +9,10 @@ apt-get -y update apt-get install -y --no-install-recommends \ build-essential \ graphviz \ - libboost-filesystem1.55-dev \ - libboost-python1.55-dev \ - libboost-system1.55-dev \ - libboost-thread1.55-dev \ + libboost-filesystem-dev \ + libboost-python-dev \ + libboost-system-dev \ + libboost-thread-dev \ libgflags-dev \ libgoogle-glog-dev \ libhdf5-serial-dev \ From 30a2ab7e50430911f37ddf981e67e4f36f662f14 Mon Sep 17 00:00:00 2001 From: Zhou Mo Date: Mon, 15 May 2017 02:16:19 +0000 Subject: [PATCH 5/8] cmake: rename libproto.a -> libcaffeproto.a --- cmake/ConfigGen.cmake | 2 +- src/caffe/CMakeLists.txt | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake index ad91f542104..09bb09b4ff2 100644 --- a/cmake/ConfigGen.cmake +++ b/cmake/ConfigGen.cmake @@ -33,7 +33,7 @@ function(caffe_generate_export_configs) configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/CaffeConfig.cmake" @ONLY) # Add targets to the build-tree export set - export(TARGETS caffe proto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake") + export(TARGETS caffe caffeproto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake") export(PACKAGE Caffe) # ---[ Configure install-tree CaffeConfig.cmake file ]--- diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt index b9152e9216f..4a805568566 100644 --- a/src/caffe/CMakeLists.txt +++ b/src/caffe/CMakeLists.txt @@ -3,12 +3,12 @@ file(GLOB proto_files proto/*.proto) caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python ${proto_files}) # include python files either to force generation -add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python}) -caffe_default_properties(proto) -target_link_libraries(proto PUBLIC ${PROTOBUF_LIBRARIES}) -target_include_directories(proto PUBLIC ${PROTOBUF_INCLUDE_DIR}) +add_library(caffeproto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python}) +caffe_default_properties(caffeproto) +target_link_libraries(caffeproto PUBLIC ${PROTOBUF_LIBRARIES}) +target_include_directories(caffeproto PUBLIC ${PROTOBUF_INCLUDE_DIR}) -list(INSERT Caffe_LINKER_LIBS 0 PUBLIC proto) # note, crucial to prepend! +list(INSERT Caffe_LINKER_LIBS 0 PUBLIC caffeproto) # note, crucial to prepend! # --[ Caffe library @@ -42,7 +42,7 @@ set_target_properties(caffe PROPERTIES # ---[ Install install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install(FILES ${proto_hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/caffe/proto) -install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(TARGETS caffe caffeproto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR}) file(WRITE ${PROJECT_BINARY_DIR}/__init__.py) list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py) From 83814da36d5a44039ddc35f58f9b341e9d1bd935 Mon Sep 17 00:00:00 2001 From: Zhou Mo Date: Mon, 15 May 2017 03:04:47 +0000 Subject: [PATCH 6/8] docs/debian guide: update compiler combination table --- docs/install_apt_debian.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md index 65fe70924e1..bd91124a898 100644 --- a/docs/install_apt_debian.md +++ b/docs/install_apt_debian.md @@ -96,18 +96,22 @@ Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`. Some users may find their favorate compiler doesn't work with CUDA. ``` -CXX compiler | CUDA 7.5 | CUDA 8.0 | --------------+------------+------------+- -GCC-7 | ? | ? | -GCC-6 | ✘ | ✘ | -GCC-5 | ✔ [1] | ✔ | -CLANG-4.0 | ? | ? | -CLANG-3.9 | ✘ | ✘ | -CLANG-3.8 | ? | ✔ | +CXX compiler | CUDA 7.5 | CUDA 8.0 | CUDA 9.0 | +-------------+------------+------------+------------+ +GCC-8 | ? | ? | ? | +GCC-7 | ? | ? | ? | +GCC-6 | ✘ | ✘ | ✔ | +GCC-5 | ✔ [1] | ✔ | ✔ | +-------------+------------+------------+------------+ +CLANG-4.0 | ? | ? | ? | +CLANG-3.9 | ✘ | ✘ | ✔ | +CLANG-3.8 | ? | ✔ | ✔ | ``` `[1]` CUDA 7.5 's `host_config.h` must be patched before working with GCC-5. +`[2]` CUDA 9.0: https://devblogs.nvidia.com/parallelforall/cuda-9-features-revealed/ + BTW, please forget the GCC-4.X series, since its `libstdc++` ABI is not compatible with GCC-5's. You may encounter failure linking GCC-4.X object files against GCC-5 libraries. (See https://wiki.debian.org/GCC5 ) From 264cf199e4e8bc44bb97762b1018137704157c2c Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Tue, 13 Jun 2017 11:59:26 -0700 Subject: [PATCH 7/8] List branches in readme --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 0ae3616b4a6..c40aee65c3c 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,14 @@ Check out the [project site](http://caffe.berkeleyvision.org) for all the detail and step-by-step examples. +## Custom distributions + +- [Intel optimized branch](https://github.com/BVLC/caffe/tree/intel) for CPU, in particular Xeon processors (HSW, BDW, Xeon Phi). +- [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices. +- [Windows Caffe](https://github.com/BVLC/caffe/tree/windows) + +## Community + [![Join the chat at https://gitter.im/BVLC/caffe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models. From 4efdf7ee49cffefdd7ea099c00dc5ea327640f04 Mon Sep 17 00:00:00 2001 From: Cyprien Noel Date: Tue, 20 Jun 2017 14:20:42 -0700 Subject: [PATCH 8/8] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c40aee65c3c..5148c69d310 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ and step-by-step examples. ## Custom distributions -- [Intel optimized branch](https://github.com/BVLC/caffe/tree/intel) for CPU, in particular Xeon processors (HSW, BDW, Xeon Phi). + - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, Xeon Phi). - [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices. - [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)