-
Notifications
You must be signed in to change notification settings - Fork 326
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
scalar qs8 rsum accumulating microkernels
PiperOrigin-RevId: 631504843
- Loading branch information
1 parent
7fabcac
commit cda8726
Showing
19 changed files
with
934 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/sh | ||
# Copyright 2024 Google LLC | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
#################################### Scalar ################################### | ||
tools/xngen src/qs8-rsum/scalar.c.in -D CHANNEL_TILE=1 -D ACCUMULATORS=1 -D REQUANTIZATION=FP32 -D VARIANT=IMAGIC -D WASM=0 -o src/qs8-rsum/gen/qs8-rdsum-minmax-fp32-scalar-imagic-u1-acc1.c & | ||
tools/xngen src/qs8-rsum/scalar.c.in -D CHANNEL_TILE=2 -D ACCUMULATORS=1 -D REQUANTIZATION=FP32 -D VARIANT=IMAGIC -D WASM=0 -o src/qs8-rsum/gen/qs8-rdsum-minmax-fp32-scalar-imagic-u2-acc1.c & | ||
tools/xngen src/qs8-rsum/scalar.c.in -D CHANNEL_TILE=4 -D ACCUMULATORS=1 -D REQUANTIZATION=FP32 -D VARIANT=IMAGIC -D WASM=0 -o src/qs8-rsum/gen/qs8-rdsum-minmax-fp32-scalar-imagic-u4-acc1.c & |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
src/qs8-rsum/gen/qs8-rdsum-minmax-fp32-scalar-imagic-u1-acc1.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Auto-generated file. Do not edit! | ||
// Template: src/qs8-rsum/scalar.c.in | ||
// Generator: tools/xngen | ||
// | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
#include <assert.h> | ||
|
||
#include <xnnpack/common.h> | ||
#include <xnnpack/math.h> | ||
#include <xnnpack/reduce.h> | ||
|
||
|
||
void xnn_qs8_rsum_minmax_fp32_ukernel__scalar_imagic_u1( | ||
size_t batch, | ||
const int8_t* restrict input, | ||
int8_t* restrict output, | ||
const union xnn_qs8_avgpool_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(input != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t vinit_bias = params->fp32_scalar_imagic.init_bias; | ||
int32_t vacc0 = vinit_bias; | ||
do { | ||
const int32_t vt = (int32_t) *input++; | ||
vacc0 += vt; | ||
batch -= sizeof(int8_t); | ||
} while (batch != 0); | ||
|
||
const float vscale = params->fp32_scalar_imagic.scale; | ||
const float vmagic_bias = params->fp32_scalar_imagic.magic_bias; | ||
const int32_t vmagic_min = params->fp32_scalar_imagic.magic_min; | ||
const int32_t vmagic_max = params->fp32_scalar_imagic.magic_max; | ||
const int32_t vmagic_bias_less_zero_point = params->fp32_scalar_imagic.magic_bias_less_zero_point; | ||
|
||
float vfpacc = (float) vacc0 * vscale; | ||
vfpacc += vmagic_bias; | ||
int32_t vout = (int32_t) float_as_uint32(vfpacc); | ||
vout = math_max_s32(vout, vmagic_min); | ||
vout = math_min_s32(vout, vmagic_max); | ||
vout -= vmagic_bias_less_zero_point; | ||
|
||
*output += (int8_t) vout; | ||
} |
55 changes: 55 additions & 0 deletions
55
src/qs8-rsum/gen/qs8-rdsum-minmax-fp32-scalar-imagic-u2-acc1.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
// Auto-generated file. Do not edit! | ||
// Template: src/qs8-rsum/scalar.c.in | ||
// Generator: tools/xngen | ||
// | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
#include <assert.h> | ||
|
||
#include <xnnpack/common.h> | ||
#include <xnnpack/math.h> | ||
#include <xnnpack/reduce.h> | ||
|
||
|
||
void xnn_qs8_rsum_minmax_fp32_ukernel__scalar_imagic_u2( | ||
size_t batch, | ||
const int8_t* restrict input, | ||
int8_t* restrict output, | ||
const union xnn_qs8_avgpool_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(input != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t vinit_bias = params->fp32_scalar_imagic.init_bias; | ||
int32_t vacc0 = vinit_bias; | ||
for (; batch >= 2; batch -= 2) { | ||
const int32_t vt0 = (int32_t) input[0]; | ||
const int32_t vt1 = (int32_t) input[1]; | ||
input += 2; | ||
|
||
vacc0 += vt0; | ||
vacc0 += vt1; | ||
} | ||
|
||
if XNN_UNLIKELY(batch != 0) { | ||
const int32_t vt = (int32_t) *input; | ||
vacc0 += vt; | ||
} | ||
|
||
const float vscale = params->fp32_scalar_imagic.scale; | ||
const float vmagic_bias = params->fp32_scalar_imagic.magic_bias; | ||
const int32_t vmagic_min = params->fp32_scalar_imagic.magic_min; | ||
const int32_t vmagic_max = params->fp32_scalar_imagic.magic_max; | ||
const int32_t vmagic_bias_less_zero_point = params->fp32_scalar_imagic.magic_bias_less_zero_point; | ||
|
||
float vfpacc = (float) vacc0 * vscale; | ||
vfpacc += vmagic_bias; | ||
int32_t vout = (int32_t) float_as_uint32(vfpacc); | ||
vout = math_max_s32(vout, vmagic_min); | ||
vout = math_min_s32(vout, vmagic_max); | ||
vout -= vmagic_bias_less_zero_point; | ||
|
||
*output += (int8_t) vout; | ||
} |
62 changes: 62 additions & 0 deletions
62
src/qs8-rsum/gen/qs8-rdsum-minmax-fp32-scalar-imagic-u4-acc1.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Auto-generated file. Do not edit! | ||
// Template: src/qs8-rsum/scalar.c.in | ||
// Generator: tools/xngen | ||
// | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
#include <assert.h> | ||
|
||
#include <xnnpack/common.h> | ||
#include <xnnpack/math.h> | ||
#include <xnnpack/reduce.h> | ||
|
||
|
||
void xnn_qs8_rsum_minmax_fp32_ukernel__scalar_imagic_u4( | ||
size_t batch, | ||
const int8_t* restrict input, | ||
int8_t* restrict output, | ||
const union xnn_qs8_avgpool_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(input != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t vinit_bias = params->fp32_scalar_imagic.init_bias; | ||
int32_t vacc0 = vinit_bias; | ||
for (; batch >= 4; batch -= 4) { | ||
const int32_t vt0 = (int32_t) input[0]; | ||
const int32_t vt1 = (int32_t) input[1]; | ||
const int32_t vt2 = (int32_t) input[2]; | ||
const int32_t vt3 = (int32_t) input[3]; | ||
input += 4; | ||
|
||
vacc0 += vt0; | ||
vacc0 += vt1; | ||
vacc0 += vt2; | ||
vacc0 += vt3; | ||
} | ||
|
||
if XNN_UNLIKELY(batch != 0) { | ||
do { | ||
const int32_t vt = (int32_t) *input++; | ||
vacc0 += vt; | ||
batch -= sizeof(int8_t); | ||
} while (batch != 0); | ||
} | ||
|
||
const float vscale = params->fp32_scalar_imagic.scale; | ||
const float vmagic_bias = params->fp32_scalar_imagic.magic_bias; | ||
const int32_t vmagic_min = params->fp32_scalar_imagic.magic_min; | ||
const int32_t vmagic_max = params->fp32_scalar_imagic.magic_max; | ||
const int32_t vmagic_bias_less_zero_point = params->fp32_scalar_imagic.magic_bias_less_zero_point; | ||
|
||
float vfpacc = (float) vacc0 * vscale; | ||
vfpacc += vmagic_bias; | ||
int32_t vout = (int32_t) float_as_uint32(vfpacc); | ||
vout = math_max_s32(vout, vmagic_min); | ||
vout = math_min_s32(vout, vmagic_max); | ||
vout -= vmagic_bias_less_zero_point; | ||
|
||
*output += (int8_t) vout; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
// This source code is licensed under the BSD-style license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
|
||
$assert CHANNEL_TILE >= 1 | ||
$assert VARIANT in ("FMAGIC", "IMAGIC", "LRINTF") | ||
#include <assert.h> | ||
|
||
#include <xnnpack/common.h> | ||
#include <xnnpack/math.h> | ||
#include <xnnpack/reduce.h> | ||
|
||
|
||
$PARAMS_STRUCT = "fp32_scalar_" + VARIANT.lower() | ||
$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" | ||
$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" | ||
void xnn_qs8_rsum_minmax_${REQUANTIZATION.lower()}_ukernel__scalar_${VARIANT.lower()}_u${CHANNEL_TILE}( | ||
size_t batch, | ||
const int8_t* restrict input, | ||
int8_t* restrict output, | ||
const union xnn_qs8_avgpool_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) | ||
{ | ||
assert(batch != 0); | ||
assert(input != NULL); | ||
assert(output != NULL); | ||
|
||
const int32_t vinit_bias = params->${PARAMS_STRUCT}.init_bias; | ||
$for A in range(ACCUMULATORS): | ||
int32_t vacc${A} = vinit_bias; | ||
$if CHANNEL_TILE == 1: | ||
do { | ||
const int32_t vt = (int32_t) *input++; | ||
vacc0 += vt; | ||
batch -= sizeof(int8_t); | ||
} while (batch != 0); | ||
$else: | ||
for (; batch >= ${CHANNEL_TILE}; batch -= ${CHANNEL_TILE}) { | ||
$for N in range(CHANNEL_TILE): | ||
const int32_t vt${N} = (int32_t) input[${N}]; | ||
input += ${CHANNEL_TILE}; | ||
|
||
$for N in range(CHANNEL_TILE): | ||
vacc${N % ACCUMULATORS} += vt${N}; | ||
} | ||
$if ACCUMULATORS > 1: | ||
$ACC_SLICE = 1 | ||
$while ACC_SLICE < ACCUMULATORS: | ||
$for A in range(0, ACCUMULATORS, ACC_SLICE * 2): | ||
$if A + ACC_SLICE < ACCUMULATORS: | ||
vacc${A} += vacc${A + ACC_SLICE}; | ||
$ACC_SLICE *= 2 | ||
|
||
if XNN_UNLIKELY(batch != 0) { | ||
$if CHANNEL_TILE == 2: | ||
const int32_t vt = (int32_t) *input; | ||
vacc0 += vt; | ||
$else: | ||
do { | ||
const int32_t vt = (int32_t) *input++; | ||
vacc0 += vt; | ||
batch -= sizeof(int8_t); | ||
} while (batch != 0); | ||
} | ||
|
||
const float vscale = params->${PARAMS_STRUCT}.scale; | ||
$if VARIANT == "FMAGIC": | ||
const float voutput_min_less_zero_point = params->fp32_scalar_fmagic.output_min_less_zero_point; | ||
const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; | ||
const float vmagic_bias = params->fp32_scalar_fmagic.magic_bias; | ||
const int32_t vmagic_bias_less_output_zero_point = params->fp32_scalar_fmagic.magic_bias_less_output_zero_point; | ||
$elif VARIANT == "IMAGIC": | ||
const float vmagic_bias = params->fp32_scalar_imagic.magic_bias; | ||
const int32_t vmagic_min = params->fp32_scalar_imagic.magic_min; | ||
const int32_t vmagic_max = params->fp32_scalar_imagic.magic_max; | ||
const int32_t vmagic_bias_less_zero_point = params->fp32_scalar_imagic.magic_bias_less_zero_point; | ||
$elif VARIANT == "LRINTF": | ||
const float voutput_min_less_zero_point = params->fp32_scalar_lrintf.output_min_less_zero_point; | ||
const float voutput_max_less_zero_point = params->fp32_scalar_lrintf.output_max_less_zero_point; | ||
const int32_t voutput_zero_point = params->fp32_scalar_lrintf.output_zero_point; | ||
|
||
float vfpacc = (float) vacc0 * vscale; | ||
$if VARIANT == "FMAGIC": | ||
vfpacc = ${MAX_F32}(vfpacc, voutput_min_less_zero_point); | ||
vfpacc = ${MIN_F32}(vfpacc, voutput_max_less_zero_point); | ||
vfpacc += vmagic_bias; | ||
int32_t vout = (int32_t) float_as_uint32(vfpacc) - vmagic_bias_less_output_zero_point; | ||
$elif VARIANT == "IMAGIC": | ||
vfpacc += vmagic_bias; | ||
int32_t vout = (int32_t) float_as_uint32(vfpacc); | ||
vout = math_max_s32(vout, vmagic_min); | ||
vout = math_min_s32(vout, vmagic_max); | ||
vout -= vmagic_bias_less_zero_point; | ||
$elif VARIANT == "LRINTF": | ||
vfpacc = ${MAX_F32}(vfpacc, voutput_min_less_zero_point); | ||
vfpacc = ${MIN_F32}(vfpacc, voutput_max_less_zero_point); | ||
const int32_t vrndacc = (int32_t) lrintf(vfpacc); | ||
int32_t vout = vrndacc + voutput_zero_point; | ||
|
||
*output += (int8_t) vout; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.