Skip to content

Commit

Permalink
Generate RSUM benchmarks
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 631778203
  • Loading branch information
alankelly authored and xnnpack-bot committed May 16, 2024
1 parent 9d12551 commit ee5d146
Show file tree
Hide file tree
Showing 10 changed files with 721 additions and 366 deletions.
22 changes: 19 additions & 3 deletions bench/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,16 @@ xnnpack_benchmark(
deps = MICROKERNEL_BENCHMARK_DEPS + ["//:requantization_stubs"],
)

xnnpack_benchmark(
name = "qs8_rsum_minmax_fp32_bench",
srcs = [
"qs8-rsum-minmax-fp32.cc",
],
deps = MICROKERNEL_BENCHMARK_DEPS + [
":rsum_benchmark",
],
)

xnnpack_benchmark(
name = "qs8_vadd_bench",
srcs = [
Expand Down Expand Up @@ -442,15 +452,19 @@ xnnpack_benchmark(
srcs = [
"f16-rsum.cc",
],
deps = MICROKERNEL_BENCHMARK_DEPS,
deps = MICROKERNEL_BENCHMARK_DEPS + [
":rsum_benchmark",
],
)

xnnpack_benchmark(
name = "f16_f32acc_rsum_bench",
srcs = [
"f16-f32acc-rsum.cc",
],
deps = MICROKERNEL_BENCHMARK_DEPS,
deps = MICROKERNEL_BENCHMARK_DEPS + [
":rsum_benchmark",
],
)

xnnpack_benchmark(
Expand Down Expand Up @@ -839,7 +853,9 @@ xnnpack_benchmark(
srcs = [
"f32-rsum.cc",
],
deps = MICROKERNEL_BENCHMARK_DEPS,
deps = MICROKERNEL_BENCHMARK_DEPS + [
":rsum_benchmark",
],
)

xnnpack_benchmark(
Expand Down
68 changes: 54 additions & 14 deletions bench/f16-f32acc-rdsum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,71 +20,111 @@


#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
BENCHMARK_CAPTURE(f16_f32acc_rsum_discontig, neonfp16arith_c16,
BENCHMARK_CAPTURE(f16_f32acc_rdsum, neonfp16arith_c16,
xnn_f16_f32acc_rdsum_ukernel_7p7x__neonfp16arith_c16,
xnn_init_f16_f32acc_scale_scalar_params,
benchmark::utils::CheckNEONFP16ARITH)
->Apply(BenchmarkBatch)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)


#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
BENCHMARK_CAPTURE(f16_f32acc_rsum_discontig, neonfp16arith_c32,
BENCHMARK_CAPTURE(f16_f32acc_rdsum, neonfp16arith_c32,
xnn_f16_f32acc_rdsum_ukernel_7p7x__neonfp16arith_c32,
xnn_init_f16_f32acc_scale_scalar_params,
benchmark::utils::CheckNEONFP16ARITH)
->Apply(BenchmarkBatch)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)


#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
BENCHMARK_CAPTURE(f16_f32acc_rsum_discontig, neonfp16arith_c64,
BENCHMARK_CAPTURE(f16_f32acc_rdsum, neonfp16arith_c64,
xnn_f16_f32acc_rdsum_ukernel_7p7x__neonfp16arith_c64,
xnn_init_f16_f32acc_scale_scalar_params,
benchmark::utils::CheckNEONFP16ARITH)
->Apply(BenchmarkBatch)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rsum_discontig, avx512skx_c16,
BENCHMARK_CAPTURE(f16_f32acc_rdsum, f16c_c16,
xnn_f16_f32acc_rdsum_ukernel_7p7x__f16c_c16,
xnn_init_f16_f32acc_scale_avx_params,
benchmark::utils::CheckF16C)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rdsum, f16c_c32,
xnn_f16_f32acc_rdsum_ukernel_7p7x__f16c_c32,
xnn_init_f16_f32acc_scale_avx_params,
benchmark::utils::CheckF16C)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rdsum, f16c_c64,
xnn_f16_f32acc_rdsum_ukernel_7p7x__f16c_c64,
xnn_init_f16_f32acc_scale_avx_params,
benchmark::utils::CheckF16C)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rdsum, f16c_c128,
xnn_f16_f32acc_rdsum_ukernel_7p7x__f16c_c128,
xnn_init_f16_f32acc_scale_avx_params,
benchmark::utils::CheckF16C)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rdsum, avx512skx_c16,
xnn_f16_f32acc_rdsum_ukernel_7p7x__avx512skx_c16,
xnn_init_f16_f32acc_scale_scalar_params,
benchmark::utils::CheckAVX512SKX)
->Apply(BenchmarkBatch)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rsum_discontig, avx512skx_c32,
BENCHMARK_CAPTURE(f16_f32acc_rdsum, avx512skx_c32,
xnn_f16_f32acc_rdsum_ukernel_7p7x__avx512skx_c32,
xnn_init_f16_f32acc_scale_scalar_params,
benchmark::utils::CheckAVX512SKX)
->Apply(BenchmarkBatch)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rsum_discontig, avx512skx_c64,
BENCHMARK_CAPTURE(f16_f32acc_rdsum, avx512skx_c64,
xnn_f16_f32acc_rdsum_ukernel_7p7x__avx512skx_c64,
xnn_init_f16_f32acc_scale_scalar_params,
benchmark::utils::CheckAVX512SKX)
->Apply(BenchmarkBatch)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64


#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f16_f32acc_rsum_discontig, avx512skx_c128,
BENCHMARK_CAPTURE(f16_f32acc_rdsum, avx512skx_c128,
xnn_f16_f32acc_rdsum_ukernel_7p7x__avx512skx_c128,
xnn_init_f16_f32acc_scale_scalar_params,
benchmark::utils::CheckAVX512SKX)
->Apply(BenchmarkBatch)
->Apply(BenchmarkRDSUM)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64

Expand Down

0 comments on commit ee5d146

Please sign in to comment.