Merge c327004 into 1dea877

Tencent · Mar 24, 2020 · 24afdf0 · 24afdf0
2 parents 1dea877 + c327004
commit 24afdf0
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 8 deletions.
diff --git a/src/layer/arm/pooling_arm.cpp b/src/layer/arm/pooling_arm.cpp
@@ -532,7 +532,7 @@ int Pooling_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Opti
                     {
                         const unsigned short* sptr = m.row<const unsigned short>(i*stride_h) + j*stride_w;
 
-                        float max = sptr[0];
+                        float max = -FLT_MAX;
 
                         for (int k = 0; k < maxk; k++)
                         {

diff --git a/tests/test_relu.cpp b/tests/test_relu.cpp
@@ -35,7 +35,7 @@ static int test_relu(const ncnn::Mat& a, float slope)
     int ret = test_layer<ncnn::ReLU>("ReLU", pd, weights, opt, a);
     if (ret != 0)
     {
-        fprintf(stderr, "test_relu failed slope=%f\n", slope);
+        fprintf(stderr, "test_relu failed a.dims=%d a=(%d %d %d) slope=%f\n", a.dims, a.w, a.h, a.c, slope);
     }
 
     return ret;

diff --git a/tests/testutil.h b/tests/testutil.h
@@ -212,6 +212,9 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
 
     if (!op->support_vulkan) opt.use_vulkan_compute = false;
     if (!op->support_packing) opt.use_packing_layout = false;
+    if (!op->support_bf16_storage) opt.use_bf16_storage = false;
+
+    if (opt.use_int8_inference) opt.use_bf16_storage = false;
 
 #if NCNN_VULKAN
     ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
@@ -296,6 +299,16 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
             a4 = a;
         }
 
+        if (opt.use_bf16_storage)
+        {
+            for (size_t i=0; i<a4.size(); i++)
+            {
+                ncnn::Mat a_bf16;
+                ncnn::cast_float32_to_bfloat16(a4[i], a_bf16, opt);
+                a4[i] = a_bf16;
+            }
+        }
+
         if (op->support_inplace)
         {
             for (size_t i=0; i<a4.size(); i++)
@@ -309,6 +322,16 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
         {
             op->forward(a4, c, opt);
         }
+
+        if (opt.use_bf16_storage)
+        {
+            for (size_t i=0; i<c.size(); i++)
+            {
+                ncnn::Mat c_fp32;
+                ncnn::cast_bfloat16_to_float32(c[i], c_fp32, opt);
+                c[i] = c_fp32;
+            }
+        }
     }
 
 #if NCNN_VULKAN
@@ -441,6 +464,9 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
 
     if (!op->support_vulkan) opt.use_vulkan_compute = false;
     if (!op->support_packing) opt.use_packing_layout = false;
+    if (!op->support_bf16_storage) opt.use_bf16_storage = false;
+
+    if (opt.use_int8_inference) opt.use_bf16_storage = false;
 
 #if NCNN_VULKAN
     ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
@@ -515,6 +541,13 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
             a4 = a;
         }
 
+        if (opt.use_bf16_storage)
+        {
+            ncnn::Mat a_bf16;
+            ncnn::cast_float32_to_bfloat16(a4, a_bf16, opt);
+            a4 = a_bf16;
+        }
+
         if (op->support_inplace)
         {
             c = a4.clone();
@@ -524,6 +557,13 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
         {
             op->forward(a4, c, opt);
         }
+
+        if (opt.use_bf16_storage)
+        {
+            ncnn::Mat c_fp32;
+            ncnn::cast_bfloat16_to_float32(c, c_fp32, opt);
+            c = c_fp32;
+        }
     }
 
 #if NCNN_VULKAN
@@ -621,7 +661,7 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
 template <typename T>
 int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count = 1, float epsilon = 0.001, void (*func)(T*) = 0)
 {
-    ncnn::Option opts[2];
+    ncnn::Option opts[3];
     opts[0] = _opt;
     opts[0].use_packing_layout = false;
     opts[0].use_fp16_packed = false;
@@ -632,8 +672,12 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
     opts[1].use_fp16_packed = true;
     opts[1].use_fp16_storage = false;
     opts[1].use_shader_pack8 = true;
+    opts[2] = _opt;
+    opts[2].use_packing_layout = true;
+    opts[2].use_bf16_storage = true;
+    opts[2].use_vulkan_compute = false;
 
-    for (int i = 0; i < 2; i++)
+    for (int i = 0; i < 3; i++)
     {
         const ncnn::Option& opt = opts[i];
 
@@ -659,6 +703,24 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
             }
             epsilon_fp16 = epsilon * 100;// 0.1
         }
+        else if (opt.use_bf16_storage)
+        {
+            a_fp16.resize(a.size());
+            for (size_t j = 0; j < a.size(); j++)
+            {
+                ncnn::Mat tmp;
+                ncnn::cast_float32_to_bfloat16(a[j], tmp, opt);
+                ncnn::cast_bfloat16_to_float32(tmp, a_fp16[j], opt);
+            }
+            weights_fp16.resize(weights.size());
+            for (size_t j = 0; j < weights.size(); j++)
+            {
+                ncnn::Mat tmp;
+                ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
+                ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
+            }
+            epsilon_fp16 = epsilon * 100;// 0.1
+        }
         else
         {
             a_fp16 = a;
@@ -670,7 +732,7 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
         int ret = test_layer<T>(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_blob_count, top_shapes, epsilon_fp16, func);
         if (ret != 0)
         {
-            fprintf(stderr, "test_layer %s failed use_packing_layout=%d\n", layer_type, opt.use_packing_layout);
+            fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_bf16_storage=%d\n", layer_type, opt.use_packing_layout, opt.use_bf16_storage);
             return ret;
         }
     }
@@ -681,7 +743,7 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
 template <typename T>
 int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, float epsilon = 0.001, void (*func)(T*) = 0)
 {
-    ncnn::Option opts[2];
+    ncnn::Option opts[3];
     opts[0] = _opt;
     opts[0].use_packing_layout = false;
     opts[0].use_fp16_packed = false;
@@ -692,8 +754,12 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
     opts[1].use_fp16_packed = true;
     opts[1].use_fp16_storage = false;
     opts[1].use_shader_pack8 = true;
+    opts[2] = _opt;
+    opts[2].use_packing_layout = true;
+    opts[2].use_bf16_storage = true;
+    opts[2].use_vulkan_compute = false;
 
-    for (int i = 0; i < 2; i++)
+    for (int i = 0; i < 3; i++)
     {
         const ncnn::Option& opt = opts[i];
 
@@ -717,6 +783,22 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
             }
             epsilon_fp16 = epsilon * 100;// 0.1
         }
+        else if (opt.use_bf16_storage)
+        {
+            {
+                ncnn::Mat tmp;
+                ncnn::cast_float32_to_bfloat16(a, tmp, opt);
+                ncnn::cast_bfloat16_to_float32(tmp, a_fp16, opt);
+            }
+            weights_fp16.resize(weights.size());
+            for (size_t j = 0; j < weights.size(); j++)
+            {
+                ncnn::Mat tmp;
+                ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
+                ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
+            }
+            epsilon_fp16 = epsilon * 100;// 0.1
+        }
         else
         {
             a_fp16 = a;
@@ -728,7 +810,7 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
         int ret = test_layer<T>(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_shape, epsilon_fp16, func);
         if (ret != 0)
         {
-            fprintf(stderr, "test_layer %s failed use_packing_layout=%d\n", layer_type, opt.use_packing_layout);
+            fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_bf16_storage=%d\n", layer_type, opt.use_packing_layout, opt.use_bf16_storage);
             return ret;
         }
     }