Skip to content

Commit

Permalink
PR tensorflow#488: Adding support for float and bf16 output and summa…
Browse files Browse the repository at this point in the history
…nd types in _FusedQuantizedConv2D and _FusedQuantizedDepthwiseConv2D
  • Loading branch information
bhavani-subramanian committed Jun 23, 2023
1 parent e3ebf07 commit e47ca23
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 23 deletions.
15 changes: 15 additions & 0 deletions tensorflow/core/kernels/mkl/mkl_conv_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3114,6 +3114,14 @@ REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedConv2D",
MklQuantizedConvOp, qint8, quint8,
false, quantized_fusions::none,
-1);
REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedConv2D",
MklQuantizedConvOp, bfloat16,
bfloat16, false,
quantized_fusions::none, -1);
REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedConv2D",
MklQuantizedConvOp, float, float,
false, quantized_fusions::none,
-1);
REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedDepthwiseConv2D",
MklQuantizedConvOp, qint8, qint8,
true, quantized_fusions::none, -1);
Expand All @@ -3126,6 +3134,13 @@ REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedDepthwiseConv2D",
REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedDepthwiseConv2D",
MklQuantizedConvOp, qint8, quint8,
true, quantized_fusions::none, -1);
REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedDepthwiseConv2D",
MklQuantizedConvOp, bfloat16,
bfloat16, true,
quantized_fusions::none, -1);
REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedDepthwiseConv2D",
MklQuantizedConvOp, float, float,
true, quantized_fusions::none, -1);
REGISTER_MKL_KERNEL_ALL_INPUT_AND_BIAS_TYPES("_FusedQuantizedConv3D",
MklQuantizedConvOp, qint8, qint8,
false, quantized_fusions::none,
Expand Down
51 changes: 32 additions & 19 deletions tensorflow/core/kernels/mkl/mkl_quantized_conv_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -765,13 +765,14 @@ class QuantizedConvTest : public OpsTestBase {
const Tensor& output = *GetOutput(0);
const Tensor& output_min = *GetOutput(1);
const Tensor& output_max = *GetOutput(2);
const float output_max_value = output_max.scalar<float>()();

Tensor output_float;
MklTestingUtil::RunDequantizeOp(output, output_min, output_max, "SCALED",
&output_float);
Tensor output_float = output;
if (!std::is_same<Toutput, float>::value)
MklTestingUtil::RunDequantizeOp(output, output_min, output_max, "SCALED",
&output_float);
if (std::is_same<Tsummand, qint8>::value &&
std::is_same<Toutput, quint8>::value) {
const float output_max_value = output_max.flat<float>()(0);
// When summand's type is qint8 and output's type is quint8, we need to
// clamp the expected value. Although output's type is quint8, it cannot
// hold values larger than 127 due to limitation in the implementation.
Expand Down Expand Up @@ -850,9 +851,9 @@ class QuantizedConvTest : public OpsTestBase {
}

template <typename Tinput, typename Toutput>
void TestBiasAddFusion(bool fuse_requantize, const bool is_depthwise,
string activation = "", const float tol = 1.0,
const float alpha = 0.0) {
void TestBiasAddFusion(bool fuse_requantize, bool fuse_dequantize,
const bool is_depthwise, string activation = "",
const float tol = 1.0, const float alpha = 0.0) {
const int stride = 1;
const string padding = "VALID";
std::vector<string> fused_ops = {"BiasAdd"};
Expand All @@ -879,6 +880,10 @@ class QuantizedConvTest : public OpsTestBase {
input_types.push_back(DT_FLOAT); // max_freezed_output
}

if (fuse_dequantize) {
fused_ops.push_back("Dequantize");
}

TF_EXPECT_OK(
NodeDefBuilder("quantized_conv_op",
is_depthwise ? "_FusedQuantizedDepthwiseConv2D"
Expand Down Expand Up @@ -1228,49 +1233,57 @@ class QuantizedConvTest : public OpsTestBase {
};

TEST_F(QuantizedConvTest, BiasAddFusion) {
TestBiasAddFusion<qint8, qint32>(false, false);
TestBiasAddFusion<qint8, qint32>(false, false, false);
}

TEST_F(QuantizedConvTest, BiasAddRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, false);
TestBiasAddFusion<qint8, qint8>(true, false, false);
}

TEST_F(QuantizedConvTest, BiasAddDequantizeFusion) {
TestBiasAddFusion<qint8, float>(false, true, false);
}

TEST_F(QuantizedConvTest, DWBiasAddDequantizeFusion) {
TestBiasAddFusion<qint8, float>(false, true, true);
}

TEST_F(QuantizedConvTest, Conv3DBiasAddRequantizeFusion) {
TestConv3DBiasAddFusion<qint8, qint8>(true, "Relu");
}

TEST_F(QuantizedConvTest, BiasAddReluRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, false, "Relu");
TestBiasAddFusion<qint8, qint8>(true, false, false, "Relu");
}

TEST_F(QuantizedConvTest, BiasAddLeakyReluRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, false, "LeakyRelu", 1.0, 0.2);
TestBiasAddFusion<qint8, qint8>(true, false, false, "LeakyRelu", 1.0, 0.2);
}

TEST_F(QuantizedConvTest, UnsignedInputBiasAddReluRequantizeFusion) {
// We need higher tolerance for quint8 input/output
TestBiasAddFusion<quint8, quint8>(true, false, "Relu", 4.0);
TestBiasAddFusion<quint8, quint8>(true, false, false, "Relu", 4.0);
}

TEST_F(QuantizedConvTest, DWBiasAddFusion) {
TestBiasAddFusion<qint8, qint32>(false, true);
TestBiasAddFusion<qint8, qint32>(false, false, true);
}

TEST_F(QuantizedConvTest, DWBiasAddRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, true);
TestBiasAddFusion<qint8, qint8>(true, false, true);
}

TEST_F(QuantizedConvTest, DWBiasAddReluRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, true, "Relu");
TestBiasAddFusion<qint8, qint8>(true, false, true, "Relu");
}

TEST_F(QuantizedConvTest, DWBiasAddLeakyReluRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, true, "LeakyRelu", 1.0, 0.2);
TestBiasAddFusion<qint8, qint8>(true, false, true, "LeakyRelu", 1.0, 0.2);
}

TEST_F(QuantizedConvTest, DWUnsignedInputBiasAddReluRequantizeFusion) {
// We need higher tolerance for quint8 input/output
TestBiasAddFusion<quint8, quint8>(true, true, "Relu", 4.0);
TestBiasAddFusion<quint8, quint8>(true, false, true, "Relu", 4.0);
}

TEST_F(QuantizedConvTest, BiasAddSumReluRequantizeFusion) {
Expand Down Expand Up @@ -1314,11 +1327,11 @@ TEST_F(QuantizedConvTest, BiasAddLeakyReluSumFusionFloatSummand) {
}

TEST_F(QuantizedConvTest, BiasAddSigmoidRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, false, "Sigmoid");
TestBiasAddFusion<qint8, qint8>(true, false, false, "Sigmoid");
}

TEST_F(QuantizedConvTest, DWBiasAddSigmoidRequantizeFusion) {
TestBiasAddFusion<qint8, qint8>(true, true, "Sigmoid");
TestBiasAddFusion<qint8, qint8>(true, false, true, "Sigmoid");
}

class QuantizedConv3DTest : public OpsTestBase {
Expand Down
8 changes: 4 additions & 4 deletions tensorflow/core/ops/mkl_nn_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -762,8 +762,8 @@ REGISTER_OP("_FusedQuantizedConv2D")
.Attr("Tinput: quantizedtype = DT_QUINT8")
.Attr("Tfilter: quantizedtype = DT_QINT8")
.Attr("Tbias: {float, qint32} = DT_QINT32")
.Attr("Tsummand: {float, quint8, qint8, qint32}")
.Attr("out_type: quantizedtype = DT_QINT32")
.Attr("Tsummand: {float, bfloat16, quint8, qint8, qint32}")
.Attr("out_type: {float, bfloat16, quint8, qint8, qint32} = DT_QINT32")
.Attr("Tdevice_inputs: list(type) >= 0 = []")
.Attr("Thost_inputs: list(type) >= 0")
.Attr("Tdevice_outputs: list(type) >= 0 = []")
Expand All @@ -787,8 +787,8 @@ REGISTER_OP("_FusedQuantizedDepthwiseConv2D")
.Attr("Tinput: quantizedtype = DT_QUINT8")
.Attr("Tfilter: quantizedtype = DT_QINT8")
.Attr("Tbias: {float, qint32} = DT_QINT32")
.Attr("Tsummand: {float, quint8, qint8, qint32}")
.Attr("out_type: quantizedtype = DT_QINT32")
.Attr("Tsummand: {float, bfloat16, quint8, qint8, qint32}")
.Attr("out_type: {float, bfloat16, quint8, qint8, qint32} = DT_QINT32")
.Attr("Tdevice_inputs: list(type) >= 0 = []")
.Attr("Thost_inputs: list(type) >= 0")
.Attr("Tdevice_outputs: list(type) >= 0 = []")
Expand Down

0 comments on commit e47ca23

Please sign in to comment.