Skip to content

Commit

Permalink
Shyrma bn mkl bp (#14)
Browse files Browse the repository at this point in the history
* - write code for new batchnorm backprop

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing batchnorm backprop

Signed-off-by: Yurii <iuriish@yahoo.com>

* - write code for batchnorm backprop based on mkl dnn api

Signed-off-by: Yurii <iuriish@yahoo.com>

* - testing and fixing bugs in batchnorm_bp mkl dnn

Signed-off-by: Yurii <iuriish@yahoo.com>

* - made corrections required by reviewer

Signed-off-by: Yurii <iuriish@yahoo.com>

* - change name in java wrapper for batchnorm op

Signed-off-by: Yurii <iuriish@yahoo.com>
  • Loading branch information
shyrma committed Oct 26, 2019
1 parent d333d29 commit 029a69a
Show file tree
Hide file tree
Showing 16 changed files with 1,295 additions and 714 deletions.
1 change: 1 addition & 0 deletions libnd4j/include/helpers/ConstantShapeHelper.h
Expand Up @@ -60,6 +60,7 @@ namespace nd4j {
Nd4jLong* createShapeInfo(const ShapeDescriptor &descriptor);
Nd4jLong* createShapeInfo(const nd4j::DataType dataType, const char order, const std::vector<Nd4jLong> &shape);
Nd4jLong* createShapeInfo(const nd4j::DataType dataType, const char order, const int rank, const Nd4jLong* shape);
Nd4jLong* createShapeInfo(const nd4j::DataType dataType, const Nd4jLong* shapeInfo);

Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, nd4j::memory::Workspace *workspace);
Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal = true);
Expand Down
4 changes: 4 additions & 0 deletions libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp
Expand Up @@ -99,6 +99,10 @@ namespace nd4j {
return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
}

Nd4jLong* ConstantShapeHelper::createShapeInfo(const nd4j::DataType dataType, const Nd4jLong* shapeInfo) {
return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast<Nd4jLong*>(shapeInfo)));
}

Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const nd4j::DataType dataType) {
auto descriptor = ShapeDescriptor::emptyDescriptor(dataType);
return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
Expand Down
4 changes: 4 additions & 0 deletions libnd4j/include/helpers/cuda/ConstantShapeHelper.cu
Expand Up @@ -102,6 +102,10 @@ namespace nd4j {
return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
}

Nd4jLong* ConstantShapeHelper::createShapeInfo(const nd4j::DataType dataType, const Nd4jLong* shapeInfo) {
return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast<Nd4jLong*>(shapeInfo)));
}

Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const nd4j::DataType dataType) {
auto descriptor = ShapeDescriptor::emptyDescriptor(dataType);
return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
Expand Down
480 changes: 146 additions & 334 deletions libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp

Large diffs are not rendered by default.

51 changes: 24 additions & 27 deletions libnd4j/include/ops/declarable/headers/nn.h
Expand Up @@ -29,32 +29,32 @@ namespace nd4j {
#if NOT_EXCLUDED(OP_softmax)
DECLARE_CONFIGURABLE_OP(softmax, 1, 1, true, 0, 0);
DECLARE_CONFIGURABLE_OP(softmax_bp, 2, 1, true, 0, 0);
#endif
#endif

/**
* Local response normalization implementation as TF.
* input: 4D array
*
*
* T args:
*
* 0: bias
* 1: alpha
* 2: beta
*
* Int arg: depth - optional local radius
*
* output - 4D array
*
* output - 4D array
*/
#if NOT_EXCLUDED(OP_lrn)
DECLARE_CONFIGURABLE_OP(lrn, 1, 1, true, 3, 0);
#endif

/**
* Local response normalization - backprop variant.
* input:
* input:
* 0 - 4D array of data
* 1 - epsilon - 4D array of approximation
*
*
* T args:
*
* 0: bias
Expand All @@ -70,55 +70,52 @@ namespace nd4j {
#endif

/**
* Batch normalization implementation.
* Batch normalization implementation.
* Reference: https://arxiv.org/abs/1502.03167v3
*
*
* Expected arguments:
* input: input array (any number of dimensions)
* mean:
* variance:
* gamma:
* beta:
*
*
* Int args:
* 0: apply scale
* 1: apply offset
*
*
*
*
* T args:
* 0: epsilon
*/
#if NOT_EXCLUDED(OP_batchnorm)
DECLARE_CUSTOM_OP(batchnorm, 3, 1, false, 1, 2);
#endif
#if NOT_EXCLUDED(OP_batchnorm_new)
DECLARE_CUSTOM_OP(batchnorm_new, 3, 1, false, 1, 2);
#endif

/**
* back prop in batch normalization
*
*
* Expected arguments:
* input: input array (any number of dimensions)
* mean:
* variance:
* gamma: optional
* beta: optional
* dLdOut: next epsilon
*
*
* Int args:
* 0: apply scale
* 1: apply offset
*
* 1: apply offset
*
* T args:
* 0: epsilon
*
* output arrays:
* dL/dInput
* dL/dMean
* dL/dVariance
* dL/dGamma
* dL/dBeta
* dL/dGamma, optional
* dL/dBeta, optional
*/
#if NOT_EXCLUDED(OP_batchnorm)
DECLARE_CUSTOM_OP(batchnorm_bp, 4, 3, false, 1, 2);
Expand All @@ -131,30 +128,30 @@ namespace nd4j {
* x: parameters, any shape
* y: gradients. same shape as x
* lr: optional, learning rate
*
*
* T args:
* 0: optional, learning rate
*/
#if NOT_EXCLUDED(OP_apply_sgd)
DECLARE_CONFIGURABLE_OP(apply_sgd, 2, 1, true, -2, 0);
DECLARE_CONFIGURABLE_OP(apply_sgd, 2, 1, true, -2, 0);
#endif

/**
* This operation performs batch normalization of layer, it is based on following article http://arxiv.org/abs/1502.03167.
* Expected arguments:
* x: input 4D array of shape [bS,iH,iW,iD] (data format = NHWC) or [bS,iD,iH,iW] (data format = NCHW), where
* bS - batch size
* iH - input height
* iW - input width
* bS - batch size
* iH - input height
* iW - input width
* iD - input depth (or number of channels)
* scale: 1D input array of scale factors, shape [iD]
* offset: 1D input array of offsets (shifts), shape [iD]
* mean: 1D input array of population mean used for inference, shape [iD], this array is required only if isTraining = false
* variance: 1D input array of population mean used for inference, shape [iD], this array is required only if isTraining = false
*
*
* T input arguments:
* 0: epsilon, it is optional argument, default value is 0.001, this is small number to be added to the variance of x
*
*
* integer input arguments:
* 0: dataFormat, may have two values: zero -> NHWC, unity -> NCHW
* 1: isTraining, may have two values: zero -> inference, unity -> training
Expand Down
2 changes: 2 additions & 0 deletions libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp
Expand Up @@ -32,6 +32,8 @@ namespace helpers {
template <typename T>
static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray* variance, const NDArray* gamma, const NDArray* beta, NDArray* output, const std::vector<int>& axes, const double epsilon) {

// formula: output = gamma * ((input - mean) / sqrt(variance + epsilon)) + beta

NDArray sigmaInvGam(mean); // do not copy mean's buffer, take only its shapeInfo
T eps = epsilon;

Expand Down

0 comments on commit 029a69a

Please sign in to comment.