In [20]:
import numpy as np
rnn_gold = np.asarray([-18,-210,-112,-52,-11,-180,-80,100,-210,-8,-20,156,121,144,8,-8])
cl_weight = np.asarray([-26,48,25,43,-55,36,9,21,9,-24,-22,43,58,18,18,42])
cl_bias = np.asarray([1])
mult = rnn_gold * cl_weight

cl_out = np.sum(mult) + cl_bias*256
print(mult)
print(cl_out)

[   468 -10080  -2800  -2236    605  -6480   -720   2100  -1890    192
    440   6708   7018   2592    144   -336]
[-4019]


In [10]:
import torch
from torch import Tensor
from torch.autograd.function import Function
class GradPreserveRound(Function):
    @staticmethod
    def forward(ctx, input):
        output = torch.round(input)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        grad_input = grad_output
        if ctx.needs_input_grad[0]:
            grad_input = grad_output
        return grad_input


class GradPreserveFloor(Function):
    @staticmethod
    def forward(ctx, input):
        output = torch.floor(input)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        grad_input = grad_output
        if ctx.needs_input_grad[0]:
            grad_input = grad_output
        return grad_input
def quantize_tensor(x: Tensor, qi: int, qf: int, enable: int = 0, use_floor: bool = False) -> Tensor:
    """
    :param x: input tensor
    :param qi: number of integer bits before the decimal point
    :param qf: number of fraction bits after the decimal point
    :param enable: if 0, return x
    :param use_floor: Whether use floor() instead of round()
    :return: tensor quantized to fixed-point precision
    """
    if enable == 0:
        return x
    else:
        power = torch.tensor(float(2. ** qf), dtype=torch.float32)
        clip_val = torch.tensor(float(2. ** (qi + qf - 1) - 1), dtype=torch.float32)
        if use_floor:
            value = GradPreserveFloor.apply(x * power)
        else:
            value = GradPreserveRound.apply(x * power)  # Round Half to Even

        value = torch.max(value, -clip_val)
        value = torch.min(value, clip_val)
        # value = torch.clamp(value, -clip_val, clip_val - 1)  # saturation arithmetic
        value = torch.div(value, power)
        return value

In [18]:
torch.set_printoptions(precision=10)
a = torch.tensor(1.0)
b = quantize_tensor(1, 8, 8, enable=1, use_floor=False)
value = torch.div(a, 128.0)
print(value)

tensor(0.0078125000)


In [15]:
a = 1
b = 128
c = a/b
print(c)

0.0078125
