Skip to content

Commit

Permalink
implement layer feature disabled bit (#4278)
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Oct 18, 2022
1 parent 270d6b2 commit 0b591b0
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 34 deletions.
7 changes: 3 additions & 4 deletions src/layer.h
Expand Up @@ -96,10 +96,9 @@ class NCNN_EXPORT Layer
bool support_reserved_7;
bool support_reserved_8;
bool support_reserved_9;
bool support_reserved_10;
bool support_reserved_11;
bool support_reserved_12;
bool support_reserved_13;

// feature disabled set
int featmask;

public:
// implement inference
Expand Down
34 changes: 22 additions & 12 deletions src/layer/vulkan/convolution_vulkan.cpp
Expand Up @@ -794,7 +794,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
convert_packing(bias_data, bias_data_packed, out_elempack, opt);
}

if (opt.use_sgemm_convolution && !is_conv1x1s1d1 && num_input >= 16 && num_output >= 16)
if (opt.use_winograd_convolution && (opt.use_winograd23_convolution || opt.use_winograd43_convolution) && is_conv3x3s1d1 && num_input >= 16 && num_output >= 16)
{
// pass
}
else if (opt.use_sgemm_convolution && !is_conv1x1s1d1 && num_input >= 16 && num_output >= 16)
{
bool use_cooperative_matrix = vkdev->info.support_cooperative_matrix_16_8_8() && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0;

Expand Down Expand Up @@ -872,7 +876,7 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
}
pipeline_convolution_gemm->create(shader_type_index, opt, specializations);
}
if (is_conv1x1s1d1)
else if (is_conv1x1s1d1)
{
bool use_cooperative_matrix = vkdev->info.support_cooperative_matrix_16_8_8() && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0;

Expand Down Expand Up @@ -1221,13 +1225,16 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom
bool use_cooperative_matrix = vkdev->info.support_cooperative_matrix_16_8_8() && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 8 == 0 && num_output % 8 == 0;

bool pre_winograd43 = opt.use_winograd43_convolution;
if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24))))
pre_winograd43 = false;
if (vkdev->info.type() != 0 && (w <= 12 && h <= 12))
pre_winograd43 = false;
if (opt.use_winograd23_convolution)
{
if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24))))
pre_winograd43 = false;
if (vkdev->info.type() != 0 && (w <= 12 && h <= 12))
pre_winograd43 = false;

if (use_cooperative_matrix && (w <= 18 && h <= 18))
pre_winograd43 = false;
if (use_cooperative_matrix && (w <= 18 && h <= 18))
pre_winograd43 = false;
}

if (pre_winograd43)
{
Expand Down Expand Up @@ -1660,10 +1667,13 @@ int Convolution_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_b
if (opt.use_winograd_convolution && (opt.use_winograd23_convolution || opt.use_winograd43_convolution) && is_conv3x3s1d1 && channels * elempack >= 16 && num_output >= 16)
{
bool pre_winograd43 = opt.use_winograd43_convolution;
if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24))))
pre_winograd43 = false;
if (vkdev->info.type() != 0 && (w <= 12 && h <= 12))
pre_winograd43 = false;
if (opt.use_winograd23_convolution)
{
if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24))))
pre_winograd43 = false;
if (vkdev->info.type() != 0 && (w <= 12 && h <= 12))
pre_winograd43 = false;
}

if (pre_winograd43)
{
Expand Down
110 changes: 93 additions & 17 deletions src/net.cpp
Expand Up @@ -108,6 +108,26 @@ NetPrivate::NetPrivate(Option& _opt)
#endif // NCNN_VULKAN
}

static Option get_masked_option(const Option& opt, int featmask)
{
// mask option usage as layer specific featmask
Option opt1 = opt;
opt1.use_fp16_arithmetic = opt1.use_fp16_arithmetic && !(featmask & (1 << 0));
opt1.use_fp16_storage = opt1.use_fp16_storage && !(featmask & (1 << 1));
opt1.use_fp16_packed = opt1.use_fp16_packed && !(featmask & (1 << 1));
opt1.use_bf16_storage = opt1.use_bf16_storage && !(featmask & (1 << 2));
opt1.use_int8_packed = opt1.use_int8_packed && !(featmask & (1 << 3));
opt1.use_int8_storage = opt1.use_int8_storage && !(featmask & (1 << 3));
opt1.use_int8_arithmetic = opt1.use_int8_arithmetic && !(featmask & (1 << 3));
opt1.use_vulkan_compute = opt1.use_vulkan_compute && !(featmask & (1 << 4));
opt1.use_image_storage = opt1.use_image_storage && !(featmask & (1 << 4));
opt1.use_tensor_storage = opt1.use_tensor_storage && !(featmask & (1 << 4));
opt1.use_sgemm_convolution = opt1.use_sgemm_convolution && !(featmask & (1 << 5));
opt1.use_winograd_convolution = opt1.use_winograd_convolution && !(featmask & (1 << 6));

return opt1;
}

#if NCNN_VULKAN
int NetPrivate::upload_model()
{
Expand All @@ -132,7 +152,7 @@ int NetPrivate::upload_model()
{
if (layers[i]->support_vulkan)
{
int uret = layers[i]->upload_model(cmd, opt_upload);
int uret = layers[i]->upload_model(cmd, get_masked_option(opt_upload, layers[i]->featmask));
if (uret != 0)
{
NCNN_LOGE("layer upload_model %d failed", (int)i);
Expand Down Expand Up @@ -195,7 +215,15 @@ int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, cons
bottom_blob.elemsize = blob_mats[bottom_blob_index].elemsize;
}
#endif
int ret = do_forward_layer(layer, blob_mats, opt);
int ret = 0;
if (layer->featmask)
{
ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask));
}
else
{
ret = do_forward_layer(layer, blob_mats, opt);
}
#if NCNN_BENCHMARK
double end = get_current_time();
if (layer->one_blob_only)
Expand Down Expand Up @@ -352,7 +380,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std:
#if NCNN_BENCHMARK
cmd.record_write_timestamp(layer_index * 2);
#endif
ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt);
if (layer->featmask)
{
ret = do_forward_layer(layer, blob_mats_gpu, cmd, get_masked_option(opt, layer->featmask));
}
else
{
ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt);
}
#if NCNN_BENCHMARK
cmd.record_write_timestamp(layer_index * 2 + 1);
#endif
Expand All @@ -368,7 +403,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std:
bottom_blob = blob_mats[bottom_blob_index].shape();
}
#endif
ret = do_forward_layer(layer, blob_mats, opt);
if (layer->featmask)
{
ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask));
}
else
{
ret = do_forward_layer(layer, blob_mats, opt);
}
#if NCNN_BENCHMARK
double end = get_current_time();
if (layer->one_blob_only)
Expand Down Expand Up @@ -677,7 +719,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std:
#endif
if (layer->support_image_storage)
{
ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, opt);
if (layer->featmask)
{
ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, get_masked_option(opt, layer->featmask));
}
else
{
ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, opt);
}
if (ret == -100)
{
image_allocation_failed = true;
Expand All @@ -686,7 +735,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std:
}
else
{
ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt);
if (layer->featmask)
{
ret = do_forward_layer(layer, blob_mats_gpu, cmd, get_masked_option(opt, layer->featmask));
}
else
{
ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt);
}
}
#if NCNN_BENCHMARK
cmd.record_write_timestamp(layer_index * 2 + 1);
Expand All @@ -703,7 +759,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std:
bottom_blob = blob_mats[bottom_blob_index].shape();
}
#endif
ret = do_forward_layer(layer, blob_mats, opt);
if (layer->featmask)
{
ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask));
}
else
{
ret = do_forward_layer(layer, blob_mats, opt);
}
#if NCNN_BENCHMARK
double end = get_current_time();
if (layer->one_blob_only)
Expand Down Expand Up @@ -790,6 +853,7 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
// *INDENT-ON*
// clang-format on

int dst_elempack = 1;
if (opt.use_packing_layout)
{
// resolve dst_elempack
Expand All @@ -801,7 +865,6 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio

int elembits = bottom_blob.elembits();

int dst_elempack = 1;
if (layer->support_packing)
{
if (elembits == 32)
Expand Down Expand Up @@ -855,13 +918,13 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
#endif
}
}
}

if (bottom_blob.elempack != dst_elempack)
{
Mat bottom_blob_packed;
convert_packing(bottom_blob, bottom_blob_packed, dst_elempack, opt);
bottom_blob = bottom_blob_packed;
}
if (bottom_blob.elempack != dst_elempack)
{
Mat bottom_blob_packed;
convert_packing(bottom_blob, bottom_blob_packed, dst_elempack, opt);
bottom_blob = bottom_blob_packed;
}

return 0;
Expand Down Expand Up @@ -1571,6 +1634,9 @@ int Net::load_param(const DataReader& dr)
layer->top_shapes[j] = d->blobs[layer->tops[j]].shape;
}

// pull out layer specific feature disabled set
layer->featmask = pd.get(31, 0);

int lr = layer->load_param(pd);
if (lr != 0)
{
Expand Down Expand Up @@ -1774,6 +1840,9 @@ int Net::load_param_bin(const DataReader& dr)
layer->top_shapes[j] = d->blobs[layer->tops[j]].shape;
}

// pull out layer specific feature disabled set
layer->featmask = pd.get(31, 0);

int lr = layer->load_param(pd);
if (lr != 0)
{
Expand Down Expand Up @@ -1855,12 +1924,17 @@ int Net::load_model(const DataReader& dr)
{
Layer* layer = d->layers[i];

Option opt1 = opt;
Option opt1 = get_masked_option(opt, layer->featmask);
#if NCNN_VULKAN
if (opt.use_vulkan_compute)
if (opt1.use_vulkan_compute)
{
if (!layer->support_image_storage) opt1.use_image_storage = false;
}
else
{
layer->vkdev = 0;
layer->support_vulkan = false;
}
#endif // NCNN_VULKAN

int cret = layer->create_pipeline(opt1);
Expand Down Expand Up @@ -2066,11 +2140,13 @@ void Net::clear()
{
Layer* layer = d->layers[i];

Option opt1 = opt;
Option opt1 = get_masked_option(opt, layer->featmask);
#if NCNN_VULKAN
if (!layer->support_image_storage)
{
opt1.use_image_storage = false;
}
#endif // NCNN_VULKAN

int dret = layer->destroy_pipeline(opt1);
if (dret != 0)
Expand Down
2 changes: 1 addition & 1 deletion tests/CMakeLists.txt
Expand Up @@ -77,7 +77,7 @@ ncnn_add_layer_test(DeconvolutionDepthWise)
ncnn_add_layer_test(DeconvolutionDepthWise1D)
ncnn_add_layer_test(DeconvolutionDepthWise3D)
ncnn_add_layer_test(DeepCopy)
ncnn_add_layer_test(DeformableConv2D)
# ncnn_add_layer_test(DeformableConv2D) too slow :(
ncnn_add_layer_test(Dequantize)
ncnn_add_layer_test(Dropout)
ncnn_add_layer_test(Einsum)
Expand Down
10 changes: 10 additions & 0 deletions tests/test_squeezenet.cpp
Expand Up @@ -177,6 +177,16 @@ static int test_squeezenet(const ncnn::Option& opt, int load_model_type, float e
{
// load from plain model file
squeezenet.load_param(MODEL_DIR "/squeezenet_v1.1.param");

// test random feature disabled bits
{
std::vector<ncnn::Layer*>& layers = squeezenet.mutable_layers();
for (size_t i = 0; i < layers.size(); i++)
{
layers[i]->featmask = i * 11 % 128;
}
}

squeezenet.load_model(MODEL_DIR "/squeezenet_v1.1.bin");
}
if (load_model_type == 1)
Expand Down

0 comments on commit 0b591b0

Please sign in to comment.