In [2]:
import time
import sys
import numpy as np
import platform
import tqdm
from typing import Tuple, List, Union, Any
import pynq_dpu
import pynq


class EvalLoader:
    def __init__(self, 
                 batch_size: int = 1, 
                 npz_path: str = 'eval_data.npz') -> None:
        data = np.load(npz_path)
        self.data = data['data'].astype(np.float32) / 255
        self.targets = data['targets']
        self.batch_size = batch_size
    
    def __getitem__(self, i):
        if i >= len(self):
            raise StopIteration

        beg = min(i * self.batch_size, self.data.shape[0])
        end = min(beg + self.batch_size, self.data.shape[0])

        return self.data[beg:end, ...], self.targets[beg:end]
    
    def __len__(self):
        return self.data.shape[0] // self.batch_size


class TimeMeasurement:
    def __init__(self, context_name: str, frames: int) -> None:
        self.context_name: str = context_name
        self.frames: int = frames
        self.begin: float = None
        self.end: float = None

    def __enter__(self):
        self.begin = time.time()
        return self

    def __exit__(self, *args):
        self.end = time.time()

    @property
    def time(self) -> float:
        if self.begin is None or self.end is None:
            raise RuntimeError()
        return int(self.end - self.begin)

    @property
    def fps(self):
        return self.frames / self.time

    def __str__(self) -> str:
        t = self.time
        h = t // 60
        min = (t - h*60) // 60
        s = int(t - h*60 - min*60)
        ms = int((t - np.floor(t))*1000)

        return f"Execution time: {h}:{min}:{s}:{ms}, processed {self.frames} frames, throughput: {self.fps} fps."

    def __repr__(self) -> str:
        t = self.time
        h = t // 60
        min = (t - h*60) // 60
        s = np.floor(t - h*60 - min*60)
        ms = np.floor((t - np.floor(t))*1000)

        return f'TimeMeasurement(context="{self.context_name}","{h}:{min}:{s}:{ms}", frames={self.frames}, throughput={self.fps})'


1. Define Accuracy metric calculation (method __call__).

In [None]:
class AccuracyMetic:
    
    def __init__(self) -> None:
        pass

    def __call__(self, y_pred: np.ndarray, y_ref: np.ndarray) -> float:
        """
        :param y_pred: array of shape (batch_size, num_of_classes) type float
        :param y_ref: array with shape (batch_size,) and type int
        :return: scalar as accuracy metric for batch
        """
        # find predicted class for each element of batch for y_pred
        y_pred_class = ...
        # compare idx of predicted class with reference output class idx
        cmp = ...
        # sum proper predictions number and divide it by batch size
        score  = ...
        ...
        return score


*Additional task: 

Define CrossEntropy loss function:

In [None]:
class CrossEntropyLoss:
    def __init__(self) -> None:
        pass
        
    def __call__(self, 
                 y_pred: np.ndarray, 
                 y_ref: np.ndarray
                 ) -> Any:
        ...
        return 0.0

2. Instantiate:
- `EvalLoader` as `loader`
- `AccuracyMetric` as `metric`
- `CrossEntropyLoss` (you can allow it to return 0.0) as `criterion`

In [None]:
loader = ...
metric = ...
criterion = ...
tm = TimeMeasurement("Evaluation on KV260", loader.batch_size * len(loader))

3. Define softmax function:

P.S. You can look at the PyTorch documentation of softmax function. 

In [None]:
def softmax(x: np.ndarray, axis=1):
    ...
    return x

Define conversion methods:
4. `input_float_to_int8(x)` - conversion from floating point number of range [0.0;1.0] 
to signed 8 bit signed type (np.int8):
- divide x by fixed point precision
- apply floor function
- limit values to range [-128; 127]
- convert to type np.int8 

5. `output_int8_to_float(y)` - conversion from int8 value as fixed point to floating point value:
- multiply y by precision
- convert type to np.float32

In [7]:
class NetworkDPU:
    
    def __init__(self, xmodel_path: str = 'MiniResnet_VAI.xmodel', dpu_path: str = 'dpu.bit'):
        # loading hardware architecture
        self.ov: pynq_dpu.DpuOverlay = pynq_dpu.DpuOverlay(dpu_path, download=True)
        # loading network architecture
        self.ov.load_model(xmodel_path)
        self.dpu = self.ov.runner
        
        # get in/out tensors descriptions
        inputTensors = self.dpu.get_input_tensors()
        outputTensors = self.dpu.get_output_tensors()
        
        # get list of shapes
        shapeIn = np.array([it.dims for it in inputTensors])
        shapeOut = np.array([ot.dims for ot in outputTensors])
        self.shapeIn = shapeIn
        self.shapeOut = shapeOut
        # create buffers for accelerator
        self.buff_in = [np.zeros(sh, np.int8, order='C') for sh in shapeIn]
        self.buff_out = [np.zeros(sh, np.int8, order='C') for sh in shapeOut]
        # get fixed point representation format (signed) 
        self.input_repr = [(it.get_attr('bit_width'), it.get_attr('fix_point')) for it in inputTensors]
        self.output_repr = [(ot.get_attr('bit_width'), ot.get_attr('fix_point')) for ot in outputTensors]
    
    def input_float_to_int8(self, x: np.ndarray) -> np.ndarray:
        BIT_WIDTH, PRECISION_BITS = self.input_repr[0]
        ...
        return x
    
    def output_int8_to_float(self, y: np.ndarray):
        BIT_WIDTH, PRECISION_BITS = self.output_repr[0]
        ...
        return y
    
    def process(self, x: np.ndarray):
        x = self.input_float_to_int8(x)
        
        # fill input buffer
        self.buff_in[0] = x
        # start DPU thread
        job_id = self.dpu.execute_async(self.buff_in, self.buff_out)
        # wait for thread end to join it
        self.dpu.wait(job_id)
        # read from output buffer
        y = self.buff_out[0]
        
        y = self.output_int8_to_float(y)
        
        y = softmax(y)
        return y
    
    def __call__(self, x: np.ndarray) -> Any:
        return self.process(x)
    

vart::Runner@0x22d68b30


6. Instantiate network:
- use path to file `*.xmodel` as xmodel_path 
- use path to file `*.bit` as dpu_path 

Notes:
- files `*.bit`, `*.xclbin` and `*.hwh` must be located in the same directory 

and have the same base name

In [None]:
net = NetworkDPU(xmodel_path=..., 
                 dpu_path=...)

In [8]:
def evaluation(model: NetworkDPU,
               data_loader: EvalLoader,
               criterion: CrossEntropyLoss,
               metric: AccuracyMetic,
               ) -> Tuple[float, float]:
    """
    Eval pass generator data through the model.
    
    :param model: network
    :param data_generator: data loader
    :param criterion: criterion / loss two arg function
    :param metric: metric object - two arg function
    :return: loss_value, metric_value
    """
    print(f"Running on platform: {platform.platform()}, "
          f"machine: {platform.machine()}, "
          f"python_version: {platform.python_version()}, "
          f"processor: {platform.processor()}, "
          f"system: {platform.system()}, "
          )
    total_loss: float = 0.0
    total_accuracy: float = 0.0
    samples_num: int = 0
    
    for i, (X, y_ref) in tqdm.tqdm(enumerate(data_loader),):
        y_pred = model(X)
        
        # calculate loss
        loss = criterion(y_pred, y_ref)
        
        # calculate accuracy
        accuracy = metric(y_pred, y_ref)

        total_loss += loss * y_pred.shape[0]
        total_accuracy += accuracy * y_pred.shape[0]
        samples_num += y_pred.shape[0]

    if samples_num == 0:
        return 0.0, 0.0

    return total_loss / samples_num, total_accuracy / samples_num


7. Run evaluation function with proper arguments:

In [9]:
with tm:
    loss, acc = evaluation(...)
    
print(str(tm))
print("Loss: ", loss)
print("Accuracy: ", acc)


Running on platform: Linux-5.4.0-1017-xilinx-zynqmp-aarch64-with-glibc2.29, machine: aarch64, python_version: 3.8.10, processor: aarch64, system: Linux, 


10000it [00:06, 1588.21it/s]

Execution time: 0:0:6:0, processed 10000 frames, throughput: 1666.6666666666667 fps.
Loss:  0.0
Accuracy:  0.9842



