Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
Choose a Base Repository
BVLC/caffe
01org/caffe
123gpg321/caffe
71squared/caffe
AMDComputeLibraries/caffe
AdamStelmaszczyk/caffe
Adnan1011/NR-IQA-CNN
AkiChen/Correlative-Filters-Caffe
AlfredXiangWu/caffe
Austriker/caffe
BlGene/caffe
CUHK-MMLAB/caffe
ChWick/caffe
ChengduoZhao/caffe
Coderx7/caffe_win
Cysu/caffe
DavyVan/caffe-with-CRF
Eniac-Xie/caffe
Fang-Haoshu/RMPE
FlorisGaisser/caffe
HUJI-Deep/caffe-simnets
JanZivcak/caffe_player
LihangLiu/apollocaffe
Maratyszcza/caffe
MhLiao/TextBoxes
MichalBusta/Ristretto-caffe
Microsoft/caffe
NVIDIA/caffe
Nanne/caffe
OpenHero/caffe
Paradigm4/caffe
Pastromhaug/caffe-stochastic-depth
RadekSimkanic/caffe-for-cudnn-v2.5.48
Russell91/apollocaffe
ShaoqingRen/caffe
ShiqiYu/caffe
Tongcheng/caffe
Xiaomi2008/caffe_nd_sense_segmentation
Yangqing/caffe
YutingZhang/caffe
acarabott/caffe
aiilab/caffe
ajtulloch/caffe
akirafukui/caffe
alemagnani/caffe
aleph7/caffe
amiralush/caffe
amoussawi/caffe
anandthakker/caffe
aravindhm/caffe
ashrafk/caffe
awabot-dev/caffe
beijbom/caffe
beniz/caffe
benjibc/caffe-rpi
borisgin/caffe
brodyh/caffe
buptwangfei/caffe
caffe2/caffe
caozhengquan/caffe
cbfinn/caffe
cdoersch/caffe
chengyangfu/caffe
chenxiang204/caffe
conner99/caffe
ctensmeyer/caffe
ctuning/caffe
cwd0610/caffe
cypof/caffe
ddtm/caffe
deercoder/DeepFood
developmentseed/caffe
dineshj1/caffe
dmlc/caffe
dosovits/caffe-fr-chairs
dpaiton/caffe
drnikolaev/caffe
dtmoodie/caffe
ducha-aiki/caffe
elezar/caffe
eli-osherovich/caffe
emcmanus/caffe
erictzeng/caffe
flickr/caffe
flx42/caffe
flynnhe/caffe
forresti/caffe
freesouls/caffe
frogner/caffe
gabrielPeart/caffe
gedikli/caffe
gidariss/caffe_LocNet
guker/caffe
gustavla/caffe
hardegg/caffe
hli2020/caffe
initialneil/caffe
intelcaffe/caffe
ivankreso/caffe-xeon-phi
jackculpepper/caffe
jacobandreas/apollocaffe
jasonustc/caffe
jeffdonahue/caffe
jetpacapp/caffe
jhauswald/caffe
jiaxiang-wu/caffe-QCNN
jjhartmann/caffe
kashefy/caffe
kmatzen/caffe
kod3r/caffe
kpzhang93/caffe-face
kuprel/caffe
lihuibng/caffe
linhj184169280/caffe
lokistone/caffe
longjon/caffe
lunochod/caffe
malinna/caffe-pose_network
matthieudelaro/caffeBVLCplus
medivhna/neural-aggregation-network
memo/caffe
mengbiping/caffe
mfs6174/caffe
mjmarin/caffe
mmoghimi/BoostCNN
mohamed-ezz/caffe
mtamburrano/caffe
muupan/caffe
mydude/caffe-triplet
myfavouritekk/caffe
n-zhang/caffe
naeluh/caffe
naibaf7/caffe
nicklhy/caffe-dev
nipengadmaster/caffe
niuzhiheng/caffe
nyamnyam7/private-caffe
pannous/caffe
pmgysel/caffe
psioncoder/caffe
pulkitag/caffe
qingswu/caffe
raingo/caffe
rakesh-mohanta/caffe
rbgirshick/caffe
redknightlois/caffe
rksltnl/Caffe-Deep-Metric-Learning-CVPR16
ronghanghu/caffe
sanghoon/caffe
scanlime/caffe
semihyagcioglu/caffe
sergeyk/caffe
sguada/caffe-public
sh1r0/caffe
shaibagon/caffe
shelhamer/caffe
shi-yan/caffe
shihenw/caffe
shiquanwang/caffe
slayton58/caffe
smajida/caffe
sp2823/caffe
stanford-futuredata/ssd-benchmarking
starimpact/caffe
stephenyan1231/caffe-private
stokasto/caffe
strin/caffe-opencl
sukritshankar/caffe
syhw/caffe
szcom/caffe
thuml/hash-caffe
tiangolo/caffe
tidsp/caffe-jacinto
tjevgerres/caffe
tnarihi/caffe
torrvision/caffe
tsingjinyun/caffe
vsubhashini/caffe
waderly/caffe
wangyida/caffe
weiliu89/caffe
willyd/caffe
woozzu/caffe
xdshang/caffe-multilabel
xianjiec/caffe
xllau/caffe
xuepo/caffe
xuzhenqi/caffe
xyy19920105/caffe
yanii/caffe
ydwen/caffe-face
yjxiong/caffe
yocox/caffe
yosinski/caffe
zhangkom/caffe
zhangliliang/caffe
zhongzhuoyao/caffe
zjchuyp/caffe
zlmzju/caffe
znah/caffe
Nothing to show
...
Choose a Head Repository
BVLC/caffe
01org/caffe
123gpg321/caffe
71squared/caffe
AMDComputeLibraries/caffe
AdamStelmaszczyk/caffe
Adnan1011/NR-IQA-CNN
AkiChen/Correlative-Filters-Caffe
AlfredXiangWu/caffe
Austriker/caffe
BlGene/caffe
CUHK-MMLAB/caffe
ChWick/caffe
ChengduoZhao/caffe
Coderx7/caffe_win
Cysu/caffe
DavyVan/caffe-with-CRF
Eniac-Xie/caffe
Fang-Haoshu/RMPE
FlorisGaisser/caffe
HUJI-Deep/caffe-simnets
JanZivcak/caffe_player
LihangLiu/apollocaffe
Maratyszcza/caffe
MhLiao/TextBoxes
MichalBusta/Ristretto-caffe
Microsoft/caffe
NVIDIA/caffe
Nanne/caffe
OpenHero/caffe
Paradigm4/caffe
Pastromhaug/caffe-stochastic-depth
RadekSimkanic/caffe-for-cudnn-v2.5.48
Russell91/apollocaffe
ShaoqingRen/caffe
ShiqiYu/caffe
Tongcheng/caffe
Xiaomi2008/caffe_nd_sense_segmentation
Yangqing/caffe
YutingZhang/caffe
acarabott/caffe
aiilab/caffe
ajtulloch/caffe
akirafukui/caffe
alemagnani/caffe
aleph7/caffe
amiralush/caffe
amoussawi/caffe
anandthakker/caffe
aravindhm/caffe
ashrafk/caffe
awabot-dev/caffe
beijbom/caffe
beniz/caffe
benjibc/caffe-rpi
borisgin/caffe
brodyh/caffe
buptwangfei/caffe
caffe2/caffe
caozhengquan/caffe
cbfinn/caffe
cdoersch/caffe
chengyangfu/caffe
chenxiang204/caffe
conner99/caffe
ctensmeyer/caffe
ctuning/caffe
cwd0610/caffe
cypof/caffe
ddtm/caffe
deercoder/DeepFood
developmentseed/caffe
dineshj1/caffe
dmlc/caffe
dosovits/caffe-fr-chairs
dpaiton/caffe
drnikolaev/caffe
dtmoodie/caffe
ducha-aiki/caffe
elezar/caffe
eli-osherovich/caffe
emcmanus/caffe
erictzeng/caffe
flickr/caffe
flx42/caffe
flynnhe/caffe
forresti/caffe
freesouls/caffe
frogner/caffe
gabrielPeart/caffe
gedikli/caffe
gidariss/caffe_LocNet
guker/caffe
gustavla/caffe
hardegg/caffe
hli2020/caffe
initialneil/caffe
intelcaffe/caffe
ivankreso/caffe-xeon-phi
jackculpepper/caffe
jacobandreas/apollocaffe
jasonustc/caffe
jeffdonahue/caffe
jetpacapp/caffe
jhauswald/caffe
jiaxiang-wu/caffe-QCNN
jjhartmann/caffe
kashefy/caffe
kmatzen/caffe
kod3r/caffe
kpzhang93/caffe-face
kuprel/caffe
lihuibng/caffe
linhj184169280/caffe
lokistone/caffe
longjon/caffe
lunochod/caffe
malinna/caffe-pose_network
matthieudelaro/caffeBVLCplus
medivhna/neural-aggregation-network
memo/caffe
mengbiping/caffe
mfs6174/caffe
mjmarin/caffe
mmoghimi/BoostCNN
mohamed-ezz/caffe
mtamburrano/caffe
muupan/caffe
mydude/caffe-triplet
myfavouritekk/caffe
n-zhang/caffe
naeluh/caffe
naibaf7/caffe
nicklhy/caffe-dev
nipengadmaster/caffe
niuzhiheng/caffe
nyamnyam7/private-caffe
pannous/caffe
pmgysel/caffe
psioncoder/caffe
pulkitag/caffe
qingswu/caffe
raingo/caffe
rakesh-mohanta/caffe
rbgirshick/caffe
redknightlois/caffe
rksltnl/Caffe-Deep-Metric-Learning-CVPR16
ronghanghu/caffe
sanghoon/caffe
scanlime/caffe
semihyagcioglu/caffe
sergeyk/caffe
sguada/caffe-public
sh1r0/caffe
shaibagon/caffe
shelhamer/caffe
shi-yan/caffe
shihenw/caffe
shiquanwang/caffe
slayton58/caffe
smajida/caffe
sp2823/caffe
stanford-futuredata/ssd-benchmarking
starimpact/caffe
stephenyan1231/caffe-private
stokasto/caffe
strin/caffe-opencl
sukritshankar/caffe
syhw/caffe
szcom/caffe
thuml/hash-caffe
tiangolo/caffe
tidsp/caffe-jacinto
tjevgerres/caffe
tnarihi/caffe
torrvision/caffe
tsingjinyun/caffe
vsubhashini/caffe
waderly/caffe
wangyida/caffe
weiliu89/caffe
willyd/caffe
woozzu/caffe
xdshang/caffe-multilabel
xianjiec/caffe
xllau/caffe
xuepo/caffe
xuzhenqi/caffe
xyy19920105/caffe
yanii/caffe
ydwen/caffe-face
yjxiong/caffe
yocox/caffe
yosinski/caffe
zhangkom/caffe
zhangliliang/caffe
zhongzhuoyao/caffe
zjchuyp/caffe
zlmzju/caffe
znah/caffe
Nothing to show
  • 14 commits
  • 10 files changed
  • 0 commit comments
  • 6 contributors
Commits on Apr 19, 2017
Eric Tzeng Rewrite crop cuda kernel 33f8612
Eric Tzeng Fix crop layer lint errors cd1696d
Commits on May 04, 2017
Malte Stær Nissen Handling destruction of empty Net objects ec35395
@shelhamer shelhamer Merge pull request #5588 from ShaggO/matlab-fix-delete
Handling destruction of empty Net objects
c293d9d
@shelhamer shelhamer Merge pull request #5548 from erictzeng/crop
Rewrite crop layer GPU implementation
7d3f8a7
Commits on May 12, 2017
@lukeyeager lukeyeager Downgrade boost requirement from 1.55 to 1.54 b7e2b99
@shelhamer shelhamer Merge pull request #5617 from lukeyeager/boost-1.54
Downgrade boost requirement from 1.55 to 1.54
25391bf
Commits on May 15, 2017
@CDLuminate CDLuminate cmake: rename libproto.a -> libcaffeproto.a 30a2ab7
@CDLuminate CDLuminate docs/debian guide: update compiler combination table 83814da
@shelhamer shelhamer Merge pull request #5625 from CDLuminate/docs-update
docs/debian guide: update compiler combination table
27ffbbf
@shelhamer shelhamer Merge pull request #5624 from CDLuminate/cmake-rename-static-proto-li…
…brary

cmake: rename libproto.a -> libcaffeproto.a
91b0928
Commits on Jun 13, 2017
@cypof cypof List branches in readme
264cf19
Commits on Jun 14, 2017
@cypof cypof Merge pull request #5687 from BVLC/readme_list_branches
List branches in readme
f16b5f2
Commits on Jun 20, 2017
@cypof cypof Update README.md 4efdf7e
View
@@ -15,6 +15,14 @@ Check out the [project site](http://caffe.berkeleyvision.org) for all the detail
and step-by-step examples.
+## Custom distributions
+
+ - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, Xeon Phi).
+- [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices.
+- [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)
+
+## Community
+
[![Join the chat at https://gitter.im/BVLC/caffe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models.
View
@@ -33,7 +33,7 @@ function(caffe_generate_export_configs)
configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/CaffeConfig.cmake" @ONLY)
# Add targets to the build-tree export set
- export(TARGETS caffe proto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
+ export(TARGETS caffe caffeproto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
export(PACKAGE Caffe)
# ---[ Configure install-tree CaffeConfig.cmake file ]---
View
@@ -5,7 +5,7 @@ set(Caffe_DEFINITIONS "")
set(Caffe_COMPILE_OPTIONS "")
# ---[ Boost
-find_package(Boost 1.55 REQUIRED COMPONENTS system thread filesystem)
+find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem)
list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Boost_INCLUDE_DIRS})
list(APPEND Caffe_LINKER_LIBS PUBLIC ${Boost_LIBRARIES})
View
@@ -96,18 +96,22 @@ Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`.
Some users may find their favorate compiler doesn't work with CUDA.
```
-CXX compiler | CUDA 7.5 | CUDA 8.0 |
--------------+------------+------------+-
-GCC-7 | ? | ? |
-GCC-6 | ✘ | ✘ |
-GCC-5 | ✔ [1] | ✔ |
-CLANG-4.0 | ? | ? |
-CLANG-3.9 | ✘ | ✘ |
-CLANG-3.8 | ? | ✔ |
+CXX compiler | CUDA 7.5 | CUDA 8.0 | CUDA 9.0 |
+-------------+------------+------------+------------+
+GCC-8 | ? | ? | ? |
+GCC-7 | ? | ? | ? |
+GCC-6 | ✘ | ✘ | ✔ |
+GCC-5 | ✔ [1] | ✔ | ✔ |
+-------------+------------+------------+------------+
+CLANG-4.0 | ? | ? | ? |
+CLANG-3.9 | ✘ | ✘ | ✔ |
+CLANG-3.8 | ? | ✔ | ✔ |
```
`[1]` CUDA 7.5 's `host_config.h` must be patched before working with GCC-5.
+`[2]` CUDA 9.0: https://devblogs.nvidia.com/parallelforall/cuda-9-features-revealed/
+
BTW, please forget the GCC-4.X series, since its `libstdc++` ABI is not compatible with GCC-5's.
You may encounter failure linking GCC-4.X object files against GCC-5 libraries.
(See https://wiki.debian.org/GCC5 )
@@ -41,13 +41,15 @@ class CropLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- vector<int> offsets;
+ Blob<int> offsets;
+ Blob<int> src_strides_;
+ Blob<int> dest_strides_;
private:
// Recursive copy function.
void crop_copy(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
- const vector<int>& offsets,
+ const int* offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
View
@@ -69,7 +69,9 @@
self.blob_names = self.attributes.blob_names;
end
function delete (self)
- caffe_('delete_net', self.hNet_self);
+ if ~isempty(self.hNet_self)
+ caffe_('delete_net', self.hNet_self);
+ end
end
function layer = layers(self, layer_name)
CHECK(ischar(layer_name), 'layer_name must be a string');
@@ -9,10 +9,10 @@ apt-get -y update
apt-get install -y --no-install-recommends \
build-essential \
graphviz \
- libboost-filesystem1.55-dev \
- libboost-python1.55-dev \
- libboost-system1.55-dev \
- libboost-thread1.55-dev \
+ libboost-filesystem-dev \
+ libboost-python-dev \
+ libboost-system-dev \
+ libboost-thread-dev \
libgflags-dev \
libgoogle-glog-dev \
libhdf5-serial-dev \
View
@@ -3,12 +3,12 @@ file(GLOB proto_files proto/*.proto)
caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python ${proto_files})
# include python files either to force generation
-add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
-caffe_default_properties(proto)
-target_link_libraries(proto PUBLIC ${PROTOBUF_LIBRARIES})
-target_include_directories(proto PUBLIC ${PROTOBUF_INCLUDE_DIR})
+add_library(caffeproto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
+caffe_default_properties(caffeproto)
+target_link_libraries(caffeproto PUBLIC ${PROTOBUF_LIBRARIES})
+target_include_directories(caffeproto PUBLIC ${PROTOBUF_INCLUDE_DIR})
-list(INSERT Caffe_LINKER_LIBS 0 PUBLIC proto) # note, crucial to prepend!
+list(INSERT Caffe_LINKER_LIBS 0 PUBLIC caffeproto) # note, crucial to prepend!
# --[ Caffe library
@@ -42,7 +42,7 @@ set_target_properties(caffe PROPERTIES
# ---[ Install
install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(FILES ${proto_hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/caffe/proto)
-install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
+install(TARGETS caffe caffeproto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
file(WRITE ${PROJECT_BINARY_DIR}/__init__.py)
list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py)
@@ -40,8 +40,10 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const int start_axis = bottom[0]->CanonicalAxisIndex(param.axis());
// Initialize offsets to 0 and the new shape to the current shape of the data.
- offsets = vector<int>(input_dim, 0);
vector<int> new_shape(bottom[0]->shape());
+ vector<int> offsets_shape(1, input_dim);
+ offsets.Reshape(offsets_shape);
+ int* offset_data = offsets.mutable_cpu_data();
// Determine crop offsets and the new shape post-crop.
for (int i = 0; i < input_dim; ++i) {
@@ -63,15 +65,22 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
<< "size " << bottom[1]->shape(i) << " and offset " << crop_offset;
}
new_shape[i] = new_size;
- offsets[i] = crop_offset;
+ offset_data[i] = crop_offset;
}
top[0]->Reshape(new_shape);
+ // Compute strides
+ src_strides_.Reshape(offsets_shape);
+ dest_strides_.Reshape(offsets_shape);
+ for (int i = 0; i < input_dim; ++i) {
+ src_strides_.mutable_cpu_data()[i] = bottom[0]->count(i + 1, input_dim);
+ dest_strides_.mutable_cpu_data()[i] = top[0]->count(i + 1, input_dim);
+ }
}
template <typename Dtype>
void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top,
- const vector<int>& offsets,
+ const int* offsets,
vector<int> indices,
int cur_dim,
const Dtype* src_data,
@@ -115,7 +124,8 @@ void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
std::vector<int> indices(top[0]->num_axes(), 0);
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
- crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
+ crop_copy(bottom, top, offsets.cpu_data(), indices, 0, bottom_data, top_data,
+ true);
}
template <typename Dtype>
@@ -127,7 +137,8 @@ void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
if (propagate_down[0]) {
caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
std::vector<int> indices(top[0]->num_axes(), 0);
- crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false);
+ crop_copy(bottom, top, offsets.cpu_data(), indices, 0, top_diff,
+ bottom_diff, false);
}
}
@@ -4,103 +4,81 @@
namespace caffe {
-// Copy (one line per thread) from one array to another, with arbitrary
-// strides in the last two dimensions.
+__device__ int compute_uncropped_index(
+ int index,
+ const int ndims,
+ const int* src_strides,
+ const int* dest_strides,
+ const int* offsets) {
+ int dest_index = index;
+ int src_index = 0;
+ for (int i = 0; i < ndims; ++i) {
+ int coord = dest_index / dest_strides[i];
+ dest_index -= coord * dest_strides[i];
+ src_index += src_strides[i] * (coord + offsets[i]);
+ }
+ return src_index;
+}
+
template <typename Dtype>
-__global__ void copy_kernel(const int n, const int height, const int width,
- const int src_inner_stride,
- const int dest_inner_stride,
+__global__ void crop_kernel_forward(const int nthreads,
+ const int ndims,
+ const int* src_strides,
+ const int* dest_strides,
+ const int* offsets,
const Dtype* src, Dtype* dest) {
- CUDA_KERNEL_LOOP(index, n) {
- int src_start = index * src_inner_stride;
- int dest_start = index * dest_inner_stride;
- for (int i = 0; i < width; ++i) {
- dest[dest_start + i] = src[src_start + i];
- }
+ CUDA_KERNEL_LOOP(index, nthreads) {
+ int src_index = compute_uncropped_index(
+ index, ndims, src_strides, dest_strides, offsets);
+ dest[index] = src[src_index];
}
}
template <typename Dtype>
-void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top,
- const vector<int>& offsets,
- vector<int> indices,
- int cur_dim,
- const Dtype* src_data,
- Dtype* dest_data,
- bool is_forward) {
- if (cur_dim + 2 < top[0]->num_axes()) {
- // We are not yet at the final dimension, call copy recursivley
- for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
- indices[cur_dim] = i;
- crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1,
- src_data, dest_data, is_forward);
- }
- } else {
- // We are at the last two dimensions, which are stored continuously in
- // memory. With (N,C,H,W)
- // (0,1,2,3) cur_dim -> H
- // cur_dim+1 -> W
- const int lines = top[0]->shape(cur_dim);
- const int height = top[0]->shape(cur_dim);
- const int width = top[0]->shape(cur_dim+1);
- std::vector<int> ind_off(cur_dim+2, 0);
- for (int j = 0; j < cur_dim; ++j) {
- ind_off[j] = indices[j] + offsets[j];
- }
- ind_off[cur_dim] = offsets[cur_dim];
- ind_off[cur_dim+1] = offsets[cur_dim+1];
- // Compute copy strides
- const int src_inner_stride = bottom[0]->shape(cur_dim+1);
- const int dest_inner_stride = top[0]->shape(cur_dim+1);
-
- if (is_forward) {
- const Dtype* bottom_data = bottom[0]->gpu_data() +
- bottom[0]->offset(ind_off);
- Dtype* top_data = top[0]->mutable_gpu_data() +
- top[0]->offset(indices);
- // NOLINT_NEXT_LINE(whitespace/operators)
- copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
- lines, height, width,
- src_inner_stride,
- dest_inner_stride,
- bottom_data, top_data);
-
- } else {
- const Dtype* top_diff = top[0]->gpu_diff() +
- top[0]->offset(indices);
- Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() +
- bottom[0]->offset(ind_off);
- // NOLINT_NEXT_LINE(whitespace/operators)
- copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
- lines, height, width,
- dest_inner_stride,
- src_inner_stride,
- top_diff, bottom_diff);
- }
+__global__ void crop_kernel_backward(const int nthreads,
+ const int ndims,
+ const int* src_strides,
+ const int* dest_strides,
+ const int* offsets,
+ Dtype* src, const Dtype* dest) {
+ CUDA_KERNEL_LOOP(index, nthreads) {
+ int src_index = compute_uncropped_index(
+ index, ndims, src_strides, dest_strides, offsets);
+ src[src_index] = dest[index];
}
}
template <typename Dtype>
void CropLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
- std::vector<int> indices(top[0]->num_axes(), 0);
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
- crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
+ int n = top[0]->count();
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ crop_kernel_forward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
+ bottom[0]->num_axes(),
+ src_strides_.gpu_data(),
+ dest_strides_.gpu_data(),
+ offsets.gpu_data(),
+ bottom_data, top_data);
}
template <typename Dtype>
void CropLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+ int n = top[0]->count();
if (propagate_down[0]) {
caffe_gpu_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
- std::vector<int> indices(top[0]->num_axes(), 0);
- crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff,
- false);
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ crop_kernel_backward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
+ bottom[0]->num_axes(),
+ src_strides_.gpu_data(),
+ dest_strides_.gpu_data(),
+ offsets.gpu_data(),
+ bottom_diff, top_diff);
}
}

No commit comments for this range