Skip to content

Commit

Permalink
Compute Library v23.05
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenkins committed May 18, 2023
1 parent d8bf9b5 commit 6c713f0
Show file tree
Hide file tree
Showing 711 changed files with 181,341 additions and 200,606 deletions.
1 change: 1 addition & 0 deletions .bazelrc
Expand Up @@ -37,3 +37,4 @@ build --flag_alias=logging=//:logging
build --flag_alias=openmp=//:openmp
build --flag_alias=cppthreads=//:cppthreads
build --flag_alias=enable_bf16_validation=//:enable_bf16_validation
build --flag_alias=enable_sve_validation=//:enable_sve_validation
42 changes: 34 additions & 8 deletions Android.bp
Expand Up @@ -27,6 +27,7 @@ opencl_srcs = [
"src/core/CL/cl_kernels/common/elementwise_operation.cl",
"src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl",
"src/core/CL/cl_kernels/common/elementwise_unary.cl",
"src/core/CL/cl_kernels/common/elementwise_unary_quantized.cl",
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h",
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl",
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl",
Expand All @@ -49,6 +50,8 @@ opencl_srcs = [
"src/core/CL/cl_kernels/common/generate_proposals_quantized.cl",
"src/core/CL/cl_kernels/common/instance_normalization.cl",
"src/core/CL/cl_kernels/common/l2_normalize.cl",
"src/core/CL/cl_kernels/common/mat_mul.cl",
"src/core/CL/cl_kernels/common/mat_mul_quantized.cl",
"src/core/CL/cl_kernels/common/mean_stddev_normalization.cl",
"src/core/CL/cl_kernels/common/memset.cl",
"src/core/CL/cl_kernels/common/minmax_layer.cl",
Expand Down Expand Up @@ -175,6 +178,7 @@ cc_library_static {
"build/android-arm64v8a/src/core/CL",
"src/core/common",
"src/core/helpers",
"src/core/NEON/kernels/arm_gemm",
"src/core/NEON/kernels/assembly",
"src/core/NEON/kernels/convolution/common",
"src/core/NEON/kernels/convolution/winograd",
Expand Down Expand Up @@ -292,6 +296,7 @@ cc_library_static {
"src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp",
"src/core/NEON/kernels/NERangeKernel.cpp",
"src/core/NEON/kernels/NEReductionOperationKernel.cpp",
"src/core/NEON/kernels/NEReorderKernel.cpp",
"src/core/NEON/kernels/NEReorgLayerKernel.cpp",
"src/core/NEON/kernels/NEReverseKernel.cpp",
"src/core/NEON/kernels/NESelectKernel.cpp",
Expand All @@ -301,6 +306,7 @@ cc_library_static {
"src/core/NEON/kernels/NEStridedSliceKernel.cpp",
"src/core/NEON/kernels/NETileKernel.cpp",
"src/core/NEON/kernels/arm_conv/addressing.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_common.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
Expand Down Expand Up @@ -483,6 +489,9 @@ cc_library_static {
"src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/floor/neon/fp16.cpp",
"src/cpu/kernels/floor/neon/fp32.cpp",
"src/cpu/kernels/fuse_batch_normalization/generic/fp16.cpp",
Expand Down Expand Up @@ -510,6 +519,7 @@ cc_library_static {
"src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp",
"src/cpu/kernels/l2normlayer/generic/neon/fp32.cpp",
"src/cpu/kernels/l2normlayer/generic/neon/impl.cpp",
"src/cpu/kernels/lut/generic/neon/u8.cpp",
"src/cpu/kernels/maxunpool/generic/neon/fp16.cpp",
"src/cpu/kernels/maxunpool/generic/neon/fp32.cpp",
"src/cpu/kernels/maxunpool/generic/neon/impl.cpp",
Expand Down Expand Up @@ -578,6 +588,7 @@ cc_library_static {
"src/cpu/operators/CpuGemmDirectConv2d.cpp",
"src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp",
"src/cpu/operators/CpuGemmLowpOutputStage.cpp",
"src/cpu/operators/CpuMatMul.cpp",
"src/cpu/operators/CpuMaxUnpooling.cpp",
"src/cpu/operators/CpuMul.cpp",
"src/cpu/operators/CpuPermute.cpp",
Expand Down Expand Up @@ -630,8 +641,10 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp",
"src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
Expand Down Expand Up @@ -685,6 +698,8 @@ cc_library_static {
"src/gpu/cl/kernels/ClIm2ColKernel.cpp",
"src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.cpp",
"src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp",
"src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp",
"src/gpu/cl/kernels/ClMatMulNativeKernel.cpp",
"src/gpu/cl/kernels/ClMulKernel.cpp",
"src/gpu/cl/kernels/ClPermuteKernel.cpp",
"src/gpu/cl/kernels/ClPool2dKernel.cpp",
Expand Down Expand Up @@ -733,6 +748,7 @@ cc_library_static {
"src/gpu/cl/operators/ClGemmLowpOutputStage.cpp",
"src/gpu/cl/operators/ClIndirectConv2d.cpp",
"src/gpu/cl/operators/ClLogicalNot.cpp",
"src/gpu/cl/operators/ClMatMul.cpp",
"src/gpu/cl/operators/ClMul.cpp",
"src/gpu/cl/operators/ClPRelu.cpp",
"src/gpu/cl/operators/ClPermute.cpp",
Expand Down Expand Up @@ -815,6 +831,7 @@ cc_library_static {
"src/runtime/CL/functions/CLLogicalAnd.cpp",
"src/runtime/CL/functions/CLLogicalNot.cpp",
"src/runtime/CL/functions/CLLogicalOr.cpp",
"src/runtime/CL/functions/CLMatMul.cpp",
"src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp",
"src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp",
"src/runtime/CL/functions/CLNormalizationLayer.cpp",
Expand Down Expand Up @@ -928,6 +945,7 @@ cc_library_static {
"src/runtime/NEON/functions/NELSTMLayer.cpp",
"src/runtime/NEON/functions/NELSTMLayerQuantized.cpp",
"src/runtime/NEON/functions/NELogical.cpp",
"src/runtime/NEON/functions/NEMatMul.cpp",
"src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp",
"src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp",
"src/runtime/NEON/functions/NENormalizationLayer.cpp",
Expand All @@ -946,6 +964,7 @@ cc_library_static {
"src/runtime/NEON/functions/NERange.cpp",
"src/runtime/NEON/functions/NEReduceMean.cpp",
"src/runtime/NEON/functions/NEReductionOperation.cpp",
"src/runtime/NEON/functions/NEReorderLayer.cpp",
"src/runtime/NEON/functions/NEReorgLayer.cpp",
"src/runtime/NEON/functions/NEReshapeLayer.cpp",
"src/runtime/NEON/functions/NEReverse.cpp",
Expand Down Expand Up @@ -981,6 +1000,8 @@ cc_library_static {
"src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp",
"src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp",
"src/runtime/heuristics/indirect_conv/ClIndirectConvDefaultConfigValhall.cpp",
"src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp",
"src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp",
"utils/CommonGraphOptions.cpp",
"utils/GraphUtils.cpp",
"utils/Utils.cpp",
Expand Down Expand Up @@ -1053,6 +1074,16 @@ cc_library_static {
"src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
Expand All @@ -1070,17 +1101,14 @@ cc_library_static {
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s2_2rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s1_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s2_2rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s2_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s1_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s2_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s2_2rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za/generic.cpp",
Expand All @@ -1094,10 +1122,8 @@ cc_library_static {
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst_strided/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
Expand Down Expand Up @@ -1261,6 +1287,9 @@ cc_library_static {
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp",
Expand Down Expand Up @@ -1319,9 +1348,6 @@ cc_library_static {
"src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp",
"src/core/NEON/kernels/convolution/winograd/input_transforms/a64_fp16_6x6.cpp",
"src/core/NEON/kernels/convolution/winograd/input_transforms/a64_fp32_6x6.cpp",
"src/core/NEON/kernels/convolution/winograd/input_transforms/sme_fp32_mla_6x6.cpp",
Expand Down
37 changes: 27 additions & 10 deletions BUILD.bazel
Expand Up @@ -72,6 +72,12 @@ bool_flag(
visibility = ["//visibility:public"],
)

bool_flag(
name = "enable_sve_validation",
build_setting_default = False,
visibility = ["//visibility:public"],
)

#---------------------------------------------------------------------
# Flag variables
config_setting(
Expand Down Expand Up @@ -116,6 +122,13 @@ config_setting(
},
)

config_setting(
name = "sve_validation_flag",
flag_values = {
":enable_sve_validation": "true",
},
)


#---------------------------------------------------------------------
# Common defines used for all targets
Expand All @@ -138,8 +151,6 @@ cc_library(
"DARM_COMPUTE_GRAPH_ENABLED",
"ARM_COMPUTE_ENABLE_SVEF32MM",
"ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS",
"ENABLE_SVE",
"ARM_COMPUTE_ENABLE_SVE",
"_GLIBCXX_USE_NANOSLEEP"
] + select({
"//:debug_flag": [
Expand Down Expand Up @@ -181,7 +192,7 @@ cc_library(
name = "arm_compute_graph",
srcs = ["//src:arm_compute_graph_srcs"],
copts = [
"-march=armv8.2-a+fp16", # What arch is it we should go for here?
"-march=armv8.2-a+fp16",
] + select({
"//:debug_flag": [
"-O0",
Expand All @@ -200,7 +211,7 @@ cc_library(
}),
visibility = ["//visibility:public"],
deps = [
"arm_compute",
"arm_compute_core",
"//:common_defines",
"//arm_compute:graph_headers",
],
Expand All @@ -214,7 +225,7 @@ cc_library(
name = "arm_compute_sve2",
srcs = ["//src:arm_compute_sve2_srcs"],
copts = [
"-march=armv8.6-a+sve2+fp16+dotprod", # What arch is it we should go for here?
"-march=armv8.6-a+sve2+fp16+dotprod",
] + select({
"//:debug_flag": [
"-O0",
Expand Down Expand Up @@ -243,8 +254,10 @@ cc_library(
"//conditions:default": [],
}),
local_defines = [
"ENABLE_SVE",
"ARM_COMPUTE_ENABLE_SVE",
"ARM_COMPUTE_ENABLE_SVE2",
"ARM_COMPUTE_ENABLE_BF16"
"ARM_COMPUTE_ENABLE_BF16",
],
deps = [
"//:common_defines",
Expand All @@ -263,7 +276,7 @@ cc_library(
name = "arm_compute_sve",
srcs = ["//src:arm_compute_sve_srcs"],
copts = [
"-march=armv8.2-a+sve+fp16+dotprod", # What arch is it we should go for here?
"-march=armv8.2-a+sve+fp16+dotprod",
] + select({
"//:debug_flag": [
"-O0",
Expand Down Expand Up @@ -292,6 +305,8 @@ cc_library(
"//conditions:default": [],
}),
local_defines = [
"ENABLE_SVE",
"ARM_COMPUTE_ENABLE_SVE",
"ARM_COMPUTE_ENABLE_BF16",
],
deps = [
Expand All @@ -308,8 +323,8 @@ cc_library(
# Core and Runtime library

cc_library(
name = "arm_compute",
srcs = ["//src:arm_compute_srcs"],
name = "arm_compute_core",
srcs = ["//src:arm_compute_core_srcs"],
hdrs = glob([
"core/NEON/kernels/**/*.h",
"core/NEON/kernels/**/*.hpp",
Expand All @@ -318,7 +333,7 @@ cc_library(
"//:create_version_file",
],
copts = [
"-march=armv8.2-a+fp16", # What arch is it we should go for here?
"-march=armv8.2-a+fp16",
] + select({
"//:debug_flag": [
"-O0",
Expand Down Expand Up @@ -348,6 +363,8 @@ cc_library(
"//conditions:default": [],
}),
local_defines = [
"ENABLE_SVE",
"ARM_COMPUTE_ENABLE_SVE",
"ARM_COMPUTE_ENABLE_BF16",
],
visibility = ["//visibility:public"],
Expand Down

0 comments on commit 6c713f0

Please sign in to comment.