In [1]:
from nntool.api import NNGraph
from nntool.api.utils import quantization_options, model_settings
import numpy as np
import random
import cv2
import os
from scipy.spatial import distance
from numpy.linalg import norm
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib widget
import logging
logging.basicConfig(level=logging.ERROR)

## Load Net and Print Stats

In [2]:
G = NNGraph.load_graph("face_id.onnx", load_quantization=False)
max_activ_size, total_params = G.total_memory_usage
ops = G.total_ops
#G.draw(filepath="draw", view=True)

print(f"{G.name}:")
print(f"\tMax Active Size:\t{max_activ_size} elements")
print(f"\tTotal # Parameters:\t{total_params} elements")
print(f"\tTotal # Operations:\t{ops / 1e6:.2f} MOps")

face_id:
	Max Active Size:	257216 elements
	Total # Parameters:	520216 elements
	Total # Operations:	33.54 MOps


In [3]:
G.adjust_order()
G.fusions('scaled_match_group')
G.fusions('expression_matcher')

## Quantize Net

In [4]:
from tqdm import tqdm

folder_in = "/home/francesco/works/machine_learning/face_id/DATASETS/CASIA-WebFace_cropped/"

CALIBRATION_IMGS = []

#init seed to be reproducible choices
random.seed(10)

for root, dirs, files in os.walk(folder_in):
    for file in files:
        CALIBRATION_IMGS.append(os.path.join(root, file))

def representative_dataset():
    for image in tqdm(random.choices(CALIBRATION_IMGS, k=100)):
        img = (((cv2.imread(image).astype(np.float32))) / 256)
        img = img.transpose(2, 0, 1)
        #img=img.reshape(3,112,112)
        yield img



float_nodes=['_gdc_gdc_0_Conv_fusion_qin0','_gdc_gdc_0_Conv_fusion','_linearconv_Conv_qin0','_linearconv_Conv','_linearconv_Conv_reshape','_Reshape_2','output_1' ]

stats = G.collect_statistics(representative_dataset())
#,force_input_size=16,force_output_size=16
G.quantize(
    statistics=stats,
    graph_options=quantization_options(bits=8,use_ne16=True,hwc=True),
    # Select specific nodes and move to different quantization Scheme - TOTAL FLEXIBILITY
    node_options={
        n:quantization_options(scheme="FLOAT",float_type="bfloat16") 
            for n in float_nodes
    }
)

# G.quantize(
#     statistics=stats,
#     graph_options={
#         "scheme": "FLOAT",
#         "float_type": "float16"
#     })

100%|██████████| 100/100 [00:14<00:00,  6.83it/s]


## Eval Quantization

In [13]:
francesco_1 = cv2.imread("../cropped_faces/francesco_1.png_face_crop.ppm")
francesco_1 = (francesco_1.astype(np.float32)) / 256
#francesco_1 = francesco_1.transpose(2, 0, 1)

francesco_2 = cv2.imread("../cropped_faces/francesco_2.png_face_crop.png")
francesco_2 = (francesco_2.astype(np.float32)) / 256
#francesco_2 = francesco_2.transpose(2, 0, 1)

float_execution_0 = G.execute([francesco_1], quantize=False)
quant_execution_0 = G.execute([francesco_1], quantize=True, dequantize=True)

float_execution_1 = G.execute([francesco_2], quantize=False)
quant_execution_1 = G.execute([francesco_2], quantize=True, dequantize=True)

def cos_sim(a,b):
    return 100*round(1 - (np.dot(a, b)/(norm(a)*norm(b))),4)

# print("Cos distance --> Francesco_0 float , Francesco_0 quant")
# print(cos_sim(np.array(float_execution_0[-1]).reshape(128),np.array(quant_execution_0[-1]).reshape(128)))

# print("Cos distance --> Francesco_0 float , Francesco_1 float")
# print(cos_sim(np.array(float_execution_0[-1]).reshape(128),np.array(float_execution_1[-1]).reshape(128)))
# print("Cos distance --> Francesco_0 quant , Francesco_1 quant")
# print(cos_sim(np.array(quant_execution_0[-1]).reshape(128),np.array(quant_execution_1[-1]).reshape(128)))

#print(float_execution_0[-1])
print(quant_execution_0[-1])


# from nntool.graph.types import ConstantInputNode
# qsnrs_by_layer = G.qsnrs(float_execution_0, quant_execution_0)
# print("QSNR layer by layer (skipping constant layers like weights and biases):")
# print(f"{'Layer Name':>30} (  #): {'QSNR':4}")
# for i, q in enumerate(qsnrs_by_layer):
#     if isinstance(G[i], ConstantInputNode):
#         continue
#     #print(f"{G[i].name[0:20]:>30} ({i:3}): {q:4}")
#     print(f"{G[i].name:>30} ({i:3}): {q:4}")
    
# #G.draw(fusions=True,filepath='graph',quant_labels=True,nodes=G.nodes())
# G.qshow()


ValueError: input_1 received input of shape (3, 112, 112) but expecting [112, 112, 3]

In [6]:
# np.max(float_execution_0[109][0]),np.min(float_execution_0[109][0])
# G.qsnrs(float_execution_0,quant_execution_0)[109]

# fig, ax = plt.subplots()
# ax.plot(float_execution_0[109][0].flatten())
# ax.plot(quant_execution_0[109][0].flatten())
# fig.show()

## For Debug

# G.quantization[G[109].name].cache
# float_execution_0[109][0].shape

# from nntool.api.utils import qsnr
# G.quantization[G[109].name].in_qs[1].scale[20]=0
# for i in range(0,48):
#     print(i,qsnr(float_execution_0[109][0][:,:,i],quant_execution_0[109][0][:,:,i]))
# #
# G.quantization[G[94].name].in_qs[1].scale[13]
# G.quantization[G[94].name].in_qs[1].scale[42]


/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)
Failed to load module: /home/francesco/snap/code/common/.cache/gio-modules/libgiolibproxy.so


## Execute on Target

In [12]:
# On the target only fully integer values - NO DEQUANTIZE
int_execution = G.execute([francesco_1], dequantize=False, quantize=True)

[array([[[ 42,  51,  46],
        [ 36,  45,  44],
        [ 54,  66,  66],
        ...,
        [ 64,  73,  68],
        [ 75,  84,  79],
        [104, 117, 107]],

       [[ 36,  45,  42],
        [ 36,  45,  44],
        [ 42,  50,  52],
        ...,
        [ 64,  74,  66],
        [ 83,  94,  88],
        [ 82,  95,  86]],

       [[ 33,  37,  38],
        [ 36,  45,  44],
        [ 42,  51,  50],
        ...,
        [ 75,  85,  77],
        [ 82,  95,  88],
        [ 82,  95,  85]],

       ...,

       [[ 21,  27,  23],
        [ 17,  21,  22],
        [ 17,  21,  22],
        ...,
        [142, 162, 151],
        [194, 228, 204],
        [225, 255, 234]],

       [[ 17,  21,  20],
        [ 21,  27,  25],
        [ 14,  23,  18],
        ...,
        [ 70,  80,  81],
        [ 74,  85,  81],
        [146, 168, 156]],

       [[ 19,  28,  27],
        [ 19,  28,  23],
        [ 16,  22,  18],
        ...,
        [110, 129, 127],
        [ 63,  73,  75],
        [ 60,  69,  68]

In [8]:
# Autotiler options: make the autotiler allocate the input of the network and reuse that space after the first layer
# more L2 for the rest of the network
G[0].at_options.allocate = 1
G[0].at_options

{'ALLOCATE': 1}

In [9]:
res = G.execute_on_target(
    pmsis_os='freertos',
    platform="board",
    directory="test_run",
    input_tensors=int_execution[0],
    output_tensors=0,
    write_out_to_file=True,
    at_log=True,
    dont_run=False,
    settings=model_settings(l1_size=128000,l2_size=512000,tensor_directory="./tensors"),
    cmake=True,
    at_loglevel=1,
    print_output=True
)
for l in res.at_log[-29:-1]:
    print(l)

# res = G.gen_at_model(
#     settings=model_settings(l1_size=128000,l2_size=512000,tensor_directory="./tensors"),
#     directory="test_model",
#     at_loglevel=1
# )




Script started, output log file is '/tmp/tmpmcqnigzx/log.txt'.
-- [[36mCustom BSP[m] Custom BSP unused.
-- Found Python: /home/francesco/libraries/anaconda3/envs/ml_train/bin/python3.9 (found version "3.9.16") found components: Interpreter 
-- [[1;36mPython[m] Interpreter found here /home/francesco/libraries/anaconda3/envs/ml_train/bin/python3.9
-- [[1;32mKconfig[m] KCONFIG_CONFIG environnement variable has not been exported.
-- [[1;32mKconfig[m] Using default value sdk: "sdk.config" and app: "sdk.config".
-- [[1;36mNNTOOL[m] Using nntool in /home/francesco/works/release/gitlab_sdk/tools/nntool/scripts/nntool
-- [[1;32mSFU[m] Using SFU in 
-- [[1;33mDT[m] Creating devicetree folder
-- [[1;33mDT[m] Formating configuration file
-- [[1;33mDT[m] Selected options : [36mgap9_v2_wlcsp;gap9mod_v1_0_b;gap9evk_v1_3[m
-- [[1;33mDT[m] Compiling [36mgap9_v2_wlcsp.dts[m
-- [[1;33mDT[m] Compiling [36mgap9mod_v1_0_b.dts[m
-- [[1;33mDT[m] Compiling [36mgap9evk_v1_3.dts[m

KeyboardInterrupt: 

In [None]:
print("Out Name, QSNR NNTool vs Target run")
for outn in G.output_nodes():
    out_target = np.fromfile(f"test_run/{outn.name.capitalize()}.bin", G.quantization[outn.name].in_qs[0].dtype)
    out_nntool = int_execution[G[outn.name].step_idx]
    print(f"{outn.name}, {G.qsnrs([out_target], [out_nntool[0].flatten()])}")
    print(out_nntool[0].flatten())
    print(out_target)

Out Name, QSNR NNTool vs Target run
output_1, (27,)
[-0,275391 0,042969 -0,089844 0,052734 -0,312500 0,166992 -0,255859
 0,523438 -0,421875 0,128906 -0,035156 0,175781 0,119141 0,160156 0,265625
 -0,184570 0,053711 -0,371094 -0,460938 -0,267578 0,554688 0,402344
 -0,005859 0,207031 0,007812 -0,156250 -0,550781 -0,632812 -0,100586
 -0,144531 -0,011719 -0,269531 0,546875 0,117188 0,328125 -0,013672
 -0,523438 0,273438 0,601562 -0,085938 0,337891 -0,021484 -0,410156
 0,425781 0,077148 -0,562500 -0,195312 0,453125 0,001953 -0,300781
 -0,162109 -0,240234 -0,082031 0,240234 -0,207031 -0,289062 -0,070312
 0,302734 0,339844 0,056641 0,531250 -0,257812 -0,539062 -0,519531
 0,117188 0,226562 -0,925781 -0,003906 0,937500 -0,220703 0,027344
 0,277344 -0,531250 -0,511719 -0,337891 -0,320312 0,339844 -0,127930
 -0,136719 0,384766 0,060547 -0,130859 0,085938 0,185547 -0,423828
 -0,143555 -0,029297 -0,146484 0,542969 -0,386719 -0,353516 0,105469
 0,384766 -0,613281 -0,050781 0,062500 0,250000 0,232422