Skip to content

Commit

Permalink
Merge c327004 into 1dea877
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Mar 24, 2020
2 parents 1dea877 + c327004 commit 24afdf0
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/layer/arm/pooling_arm.cpp
Expand Up @@ -532,7 +532,7 @@ int Pooling_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const Opti
{
const unsigned short* sptr = m.row<const unsigned short>(i*stride_h) + j*stride_w;

float max = sptr[0];
float max = -FLT_MAX;

for (int k = 0; k < maxk; k++)
{
Expand Down
2 changes: 1 addition & 1 deletion tests/test_relu.cpp
Expand Up @@ -35,7 +35,7 @@ static int test_relu(const ncnn::Mat& a, float slope)
int ret = test_layer<ncnn::ReLU>("ReLU", pd, weights, opt, a);
if (ret != 0)
{
fprintf(stderr, "test_relu failed slope=%f\n", slope);
fprintf(stderr, "test_relu failed a.dims=%d a=(%d %d %d) slope=%f\n", a.dims, a.w, a.h, a.c, slope);
}

return ret;
Expand Down
94 changes: 88 additions & 6 deletions tests/testutil.h
Expand Up @@ -212,6 +212,9 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:

if (!op->support_vulkan) opt.use_vulkan_compute = false;
if (!op->support_packing) opt.use_packing_layout = false;
if (!op->support_bf16_storage) opt.use_bf16_storage = false;

if (opt.use_int8_inference) opt.use_bf16_storage = false;

#if NCNN_VULKAN
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
Expand Down Expand Up @@ -296,6 +299,16 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
a4 = a;
}

if (opt.use_bf16_storage)
{
for (size_t i=0; i<a4.size(); i++)
{
ncnn::Mat a_bf16;
ncnn::cast_float32_to_bfloat16(a4[i], a_bf16, opt);
a4[i] = a_bf16;
}
}

if (op->support_inplace)
{
for (size_t i=0; i<a4.size(); i++)
Expand All @@ -309,6 +322,16 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
{
op->forward(a4, c, opt);
}

if (opt.use_bf16_storage)
{
for (size_t i=0; i<c.size(); i++)
{
ncnn::Mat c_fp32;
ncnn::cast_bfloat16_to_float32(c[i], c_fp32, opt);
c[i] = c_fp32;
}
}
}

#if NCNN_VULKAN
Expand Down Expand Up @@ -441,6 +464,9 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:

if (!op->support_vulkan) opt.use_vulkan_compute = false;
if (!op->support_packing) opt.use_packing_layout = false;
if (!op->support_bf16_storage) opt.use_bf16_storage = false;

if (opt.use_int8_inference) opt.use_bf16_storage = false;

#if NCNN_VULKAN
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
Expand Down Expand Up @@ -515,6 +541,13 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
a4 = a;
}

if (opt.use_bf16_storage)
{
ncnn::Mat a_bf16;
ncnn::cast_float32_to_bfloat16(a4, a_bf16, opt);
a4 = a_bf16;
}

if (op->support_inplace)
{
c = a4.clone();
Expand All @@ -524,6 +557,13 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
{
op->forward(a4, c, opt);
}

if (opt.use_bf16_storage)
{
ncnn::Mat c_fp32;
ncnn::cast_bfloat16_to_float32(c, c_fp32, opt);
c = c_fp32;
}
}

#if NCNN_VULKAN
Expand Down Expand Up @@ -621,7 +661,7 @@ int test_layer(int typeindex, const ncnn::ParamDict& pd, const std::vector<ncnn:
template <typename T>
int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const std::vector<ncnn::Mat>& a, int top_blob_count = 1, float epsilon = 0.001, void (*func)(T*) = 0)
{
ncnn::Option opts[2];
ncnn::Option opts[3];
opts[0] = _opt;
opts[0].use_packing_layout = false;
opts[0].use_fp16_packed = false;
Expand All @@ -632,8 +672,12 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
opts[1].use_fp16_packed = true;
opts[1].use_fp16_storage = false;
opts[1].use_shader_pack8 = true;
opts[2] = _opt;
opts[2].use_packing_layout = true;
opts[2].use_bf16_storage = true;
opts[2].use_vulkan_compute = false;

for (int i = 0; i < 2; i++)
for (int i = 0; i < 3; i++)
{
const ncnn::Option& opt = opts[i];

Expand All @@ -659,6 +703,24 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
}
epsilon_fp16 = epsilon * 100;// 0.1
}
else if (opt.use_bf16_storage)
{
a_fp16.resize(a.size());
for (size_t j = 0; j < a.size(); j++)
{
ncnn::Mat tmp;
ncnn::cast_float32_to_bfloat16(a[j], tmp, opt);
ncnn::cast_bfloat16_to_float32(tmp, a_fp16[j], opt);
}
weights_fp16.resize(weights.size());
for (size_t j = 0; j < weights.size(); j++)
{
ncnn::Mat tmp;
ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
}
epsilon_fp16 = epsilon * 100;// 0.1
}
else
{
a_fp16 = a;
Expand All @@ -670,7 +732,7 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
int ret = test_layer<T>(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_blob_count, top_shapes, epsilon_fp16, func);
if (ret != 0)
{
fprintf(stderr, "test_layer %s failed use_packing_layout=%d\n", layer_type, opt.use_packing_layout);
fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_bf16_storage=%d\n", layer_type, opt.use_packing_layout, opt.use_bf16_storage);
return ret;
}
}
Expand All @@ -681,7 +743,7 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
template <typename T>
int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vector<ncnn::Mat>& weights, const ncnn::Option& _opt, const ncnn::Mat& a, float epsilon = 0.001, void (*func)(T*) = 0)
{
ncnn::Option opts[2];
ncnn::Option opts[3];
opts[0] = _opt;
opts[0].use_packing_layout = false;
opts[0].use_fp16_packed = false;
Expand All @@ -692,8 +754,12 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
opts[1].use_fp16_packed = true;
opts[1].use_fp16_storage = false;
opts[1].use_shader_pack8 = true;
opts[2] = _opt;
opts[2].use_packing_layout = true;
opts[2].use_bf16_storage = true;
opts[2].use_vulkan_compute = false;

for (int i = 0; i < 2; i++)
for (int i = 0; i < 3; i++)
{
const ncnn::Option& opt = opts[i];

Expand All @@ -717,6 +783,22 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
}
epsilon_fp16 = epsilon * 100;// 0.1
}
else if (opt.use_bf16_storage)
{
{
ncnn::Mat tmp;
ncnn::cast_float32_to_bfloat16(a, tmp, opt);
ncnn::cast_bfloat16_to_float32(tmp, a_fp16, opt);
}
weights_fp16.resize(weights.size());
for (size_t j = 0; j < weights.size(); j++)
{
ncnn::Mat tmp;
ncnn::cast_float32_to_bfloat16(weights[j], tmp, opt);
ncnn::cast_bfloat16_to_float32(tmp, weights_fp16[j], opt);
}
epsilon_fp16 = epsilon * 100;// 0.1
}
else
{
a_fp16 = a;
Expand All @@ -728,7 +810,7 @@ int test_layer(const char* layer_type, const ncnn::ParamDict& pd, const std::vec
int ret = test_layer<T>(ncnn::layer_to_index(layer_type), pd, weights_fp16, opt, a_fp16, top_shape, epsilon_fp16, func);
if (ret != 0)
{
fprintf(stderr, "test_layer %s failed use_packing_layout=%d\n", layer_type, opt.use_packing_layout);
fprintf(stderr, "test_layer %s failed use_packing_layout=%d use_bf16_storage=%d\n", layer_type, opt.use_packing_layout, opt.use_bf16_storage);
return ret;
}
}
Expand Down

0 comments on commit 24afdf0

Please sign in to comment.