Shyrma bn mkl bp (#14)

* - write code for new batchnorm backprop Signed-off-by: Yurii <iuriish@yahoo.com> * - testing batchnorm backprop Signed-off-by: Yurii <iuriish@yahoo.com> * - write code for batchnorm backprop based on mkl dnn api Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing bugs in batchnorm_bp mkl dnn Signed-off-by: Yurii <iuriish@yahoo.com> * - made corrections required by reviewer Signed-off-by: Yurii <iuriish@yahoo.com> * - change name in java wrapper for batchnorm op Signed-off-by: Yurii <iuriish@yahoo.com>
KonduitAI · Oct 26, 2019 · 029a69a · 029a69a
1 parent d333d29
commit 029a69a
Show file tree

Hide file tree

Showing 16 changed files with 1,295 additions and 714 deletions.
diff --git a/libnd4j/include/helpers/ConstantShapeHelper.h b/libnd4j/include/helpers/ConstantShapeHelper.h
@@ -60,6 +60,7 @@ namespace nd4j {
         Nd4jLong* createShapeInfo(const ShapeDescriptor &descriptor);
         Nd4jLong* createShapeInfo(const nd4j::DataType dataType, const char order, const std::vector<Nd4jLong> &shape);
         Nd4jLong* createShapeInfo(const nd4j::DataType dataType, const char order, const int rank, const Nd4jLong* shape);
+        Nd4jLong* createShapeInfo(const nd4j::DataType dataType, const Nd4jLong* shapeInfo);
 
         Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, nd4j::memory::Workspace *workspace);
         Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal = true);

diff --git a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp
@@ -99,6 +99,10 @@ namespace nd4j {
         return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
     }
 
+    Nd4jLong* ConstantShapeHelper::createShapeInfo(const nd4j::DataType dataType, const Nd4jLong* shapeInfo) {
+        return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast<Nd4jLong*>(shapeInfo)));
+    }
+
     Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const nd4j::DataType dataType) {
         auto descriptor = ShapeDescriptor::emptyDescriptor(dataType);
         return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();

diff --git a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu
@@ -102,6 +102,10 @@ namespace nd4j {
         return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();
     }
 
+    Nd4jLong* ConstantShapeHelper::createShapeInfo(const nd4j::DataType dataType, const Nd4jLong* shapeInfo) {
+        return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast<Nd4jLong*>(shapeInfo)));
+    }
+
     Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const nd4j::DataType dataType) {
         auto descriptor = ShapeDescriptor::emptyDescriptor(dataType);
         return bufferForShapeInfo(descriptor).primaryAsT<Nd4jLong>();

diff --git a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp
diff --git a/libnd4j/include/ops/declarable/headers/nn.h b/libnd4j/include/ops/declarable/headers/nn.h
@@ -29,32 +29,32 @@ namespace nd4j {
         #if NOT_EXCLUDED(OP_softmax)
         DECLARE_CONFIGURABLE_OP(softmax, 1, 1, true, 0, 0);
         DECLARE_CONFIGURABLE_OP(softmax_bp, 2, 1, true, 0, 0);
-        #endif       
+        #endif
 
         /**
          * Local response normalization implementation as TF.
          * input: 4D array
-         * 
+         *
          * T args:
          *
          * 0: bias
          * 1: alpha
          * 2: beta
          *
          * Int arg: depth - optional local radius
-         * 
-         * output - 4D array 
+         *
+         * output - 4D array
          */
         #if NOT_EXCLUDED(OP_lrn)
         DECLARE_CONFIGURABLE_OP(lrn, 1, 1, true, 3, 0);
         #endif
 
         /**
          * Local response normalization - backprop variant.
-         * input: 
+         * input:
          *  0 - 4D array of data
          *  1 - epsilon - 4D array of approximation
-         * 
+         *
          * T args:
          *
          * 0: bias
@@ -70,55 +70,52 @@ namespace nd4j {
         #endif
 
         /**
-        * Batch normalization implementation. 
+        * Batch normalization implementation.
         * Reference: https://arxiv.org/abs/1502.03167v3
-        * 
+        *
         * Expected arguments:
         * input: input array (any number of dimensions)
         * mean:
         * variance:
         * gamma:
         * beta:
-        * 
+        *
         * Int args:
         * 0: apply scale
         * 1: apply offset
-        * 
-        * 
+        *
+        *
         * T args:
         * 0: epsilon
         */
         #if NOT_EXCLUDED(OP_batchnorm)
         DECLARE_CUSTOM_OP(batchnorm, 3, 1, false, 1, 2);
         #endif
-        #if NOT_EXCLUDED(OP_batchnorm_new)
-        DECLARE_CUSTOM_OP(batchnorm_new, 3, 1, false, 1, 2);
-        #endif
 
         /**
         * back prop in batch normalization
-        * 
+        *
         * Expected arguments:
         * input: input array (any number of dimensions)
         * mean:
         * variance:
         * gamma: optional
         * beta: optional
         * dLdOut: next epsilon
-        * 
+        *
         * Int args:
         * 0: apply scale
-        * 1: apply offset 
-        * 
+        * 1: apply offset
+        *
         * T args:
         * 0: epsilon
         *
         * output arrays:
         * dL/dInput
         * dL/dMean
         * dL/dVariance
-        * dL/dGamma
-        * dL/dBeta
+        * dL/dGamma, optional
+        * dL/dBeta, optional
         */
         #if NOT_EXCLUDED(OP_batchnorm)
         DECLARE_CUSTOM_OP(batchnorm_bp, 4, 3, false, 1, 2);
@@ -131,30 +128,30 @@ namespace nd4j {
          * x: parameters, any shape
          * y: gradients. same shape as x
          * lr: optional, learning rate
-         * 
+         *
          * T args:
          * 0: optional, learning rate
          */
         #if NOT_EXCLUDED(OP_apply_sgd)
-        DECLARE_CONFIGURABLE_OP(apply_sgd, 2, 1, true, -2, 0);   
+        DECLARE_CONFIGURABLE_OP(apply_sgd, 2, 1, true, -2, 0);
         #endif
 
         /**
          * This operation performs batch normalization of layer, it is based on following article http://arxiv.org/abs/1502.03167.
          * Expected arguments:
          * x: input 4D array of shape [bS,iH,iW,iD] (data format = NHWC) or [bS,iD,iH,iW] (data format = NCHW), where
-         *    bS - batch size 
-         *    iH - input height    
-         *    iW - input width 
+         *    bS - batch size
+         *    iH - input height
+         *    iW - input width
          *    iD - input depth (or number of channels)
          * scale:  1D input array of scale factors, shape [iD]
          * offset: 1D input array of offsets (shifts), shape [iD]
          * mean: 1D input array of population mean used for inference, shape [iD], this array is required only if isTraining = false
          * variance: 1D input array of population mean used for inference, shape [iD], this array is required only if isTraining = false
-         * 
+         *
          * T input arguments:
          * 0: epsilon, it is optional argument, default value is 0.001, this is small number to be added to the variance of x
-         * 
+         *
          * integer input arguments:
          * 0: dataFormat, may have two values: zero -> NHWC, unity -> NCHW
          * 1: isTraining, may have two values: zero -> inference, unity -> training

diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp
@@ -32,6 +32,8 @@ namespace helpers {
 template <typename T>
 static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray* variance, const NDArray* gamma, const NDArray* beta, NDArray* output, const std::vector<int>& axes, const double epsilon) {
 
+    // formula: output = gamma * ((input - mean) / sqrt(variance + epsilon)) + beta
+
     NDArray sigmaInvGam(mean);  // do not copy mean's buffer, take only its shapeInfo
     T eps = epsilon;