forked from tensorflow/tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 16
/
api_def_QuantizedMatMulWithBiasAndRelu.pbtxt
85 lines (85 loc) · 2.16 KB
/
api_def_QuantizedMatMulWithBiasAndRelu.pbtxt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
op {
graph_op_name: "QuantizedMatMulWithBiasAndRelu"
visibility: HIDDEN
in_arg {
name: "a"
description: <<END
A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
END
}
in_arg {
name: "b"
description: <<END
A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
END
}
in_arg {
name: "bias"
description: <<END
A 1D bias tensor with size matching with inner dimension of `b` (after being
transposed if `transposed_b` is non-zero).
END
}
in_arg {
name: "min_a"
description: <<END
The float value that the lowest quantized `a` value represents.
END
}
in_arg {
name: "max_a"
description: <<END
The float value that the highest quantized `a` value represents.
END
}
in_arg {
name: "min_b"
description: <<END
The float value that the lowest quantized `b` value represents.
END
}
in_arg {
name: "max_b"
description: <<END
The float value that the highest quantized `b` value represents.
END
}
out_arg {
name: "min_out"
description: <<END
The float value that the lowest quantized output value represents.
END
}
out_arg {
name: "max_out"
description: <<END
The float value that the highest quantized output value represents.
END
}
attr {
name: "transpose_a"
description: "If true, `a` is transposed before multiplication."
}
attr {
name: "transpose_b"
description: "If true, `b` is transposed before multiplication."
}
attr {
name: "input_quant_mode"
description: <<END
Input data quantization mode. Either MIN_FIRST(default) or SCALED.
END
}
summary: <<END
Perform a quantized matrix multiplication of `a` by the matrix `b` with bias
add and relu fusion.
END
description: <<END
The inputs must be two-dimensional matrices and 1D bias vector. And the inner
dimension of `a` (after being transposed if `transpose_a` is non-zero) must
match the outer dimension of `b` (after being transposed if `transposed_b` is
non-zero). Then do broadcast add operation with bias values on the matrix
mulplication result. The bias size must match inner dimension of `b`. Then do
relu activation to get non-negative result.
END
}