In [None]:
from nntool.api import NNGraph
from nntool.api.utils import quantization_options, model_settings
from nntool.api.utils import qsnr
from nntool.quantization.qtype import QType
import numpy as np
import random, os
from PIL import Image
from scipy.spatial import distance
from numpy.linalg import norm
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib widget
import logging
logging.basicConfig(level=logging.ERROR)

## Load Net and Print Stats

In [None]:
G = NNGraph.load_graph("shufflenet.onnx", load_quantization=False)
max_activ_size, total_params = G.total_memory_usage
ops = G.total_ops
#G.draw(filepath="draw", view=True)

print(f"{G.name}:")
print(f"\tMax Active Size:\t{max_activ_size} elements")
print(f"\tTotal # Parameters:\t{total_params} elements")
print(f"\tTotal # Operations:\t{ops / 1e6:.2f} MOps")

In [None]:
G.adjust_order()
G.fusions('scaled_match_group')
G.fusions('expression_matcher')

## Quantize Net

In [None]:
from tqdm import tqdm

folder_in = "../quant_dataset"

CALIBRATION_IMGS = []

#init seed to be reproducible choices
random.seed(10)

for root, dirs, files in os.walk(folder_in):
    for file in files:
        CALIBRATION_IMGS.append(os.path.join(root, file))

def representative_dataset():
    #for image in tqdm(random.choices(CALIBRATION_IMGS, k=100)):
    for image in tqdm(CALIBRATION_IMGS):
        img = (np.array(Image.open(image)).astype(np.float32))
        img = img / 256
        img = img.transpose(2, 0, 1)
        #img=img.reshape(3,112,112)
        yield img


float_nodes=['_gdc_gdc_0_Conv_fusion_qin0','_gdc_gdc_0_Conv_fusion','_linearconv_Conv_qin0','_linearconv_Conv','_linearconv_Conv_reshape','_Reshape_2','output_1' ]

stats = G.collect_statistics(representative_dataset())

nodeqdict={
        n:quantization_options(scheme="FLOAT",float_type="bfloat16") 
            for n in float_nodes
    }

#nodeqdict.update({'input_1':quantization_options(bits=8,use_ne16=True,hwc=True,force_input_size=8,force_output_size=16)})

G.quantize(
    statistics=stats,
    graph_options=quantization_options(bits=8,use_ne16=True,hwc=True),
    # Select specific nodes and move to different quantization Scheme - TOTAL FLEXIBILITY
    node_options=nodeqdict
)

# G.quantize(
#     statistics=stats,
#     graph_options={
#         "scheme": "FLOAT",
#         "float_type": "float16"
#     })

## Eval Quantization

In [None]:
francesco_1 = np.array(Image.open("../cropped_faces/francesco_1.png_face_crop.ppm"))
francesco_1 = (francesco_1.astype(np.float32)) / 256
#The NN is set as HWC so no need for transpose
#francesco_1 = francesco_1.transpose(2, 0, 1)

francesco_2 = np.array(Image.open("../cropped_faces/francesco_2.png_face_crop.ppm"))
francesco_2 = (francesco_1.astype(np.float32)) / 256
#francesco_2 = francesco_2.transpose(2, 0, 1)

float_execution_0 = G.execute([francesco_1], quantize=False)
quant_execution_0 = G.execute([francesco_1], quantize=True, dequantize=True)

float_execution_1 = G.execute([francesco_2], quantize=False)
quant_execution_1 = G.execute([francesco_2], quantize=True, dequantize=True)

def cos_sim(a,b):
    return round(1 - (np.dot(a, b)/(norm(a)*norm(b))),4)

print("Cos distance --> Francesco_0 float , Francesco_0 quant")
print(cos_sim(np.array(float_execution_0[-1]).reshape(128),np.array(quant_execution_0[-1]).reshape(128)))

print("Cos distance --> Francesco_0 float , Francesco_1 float")
print(cos_sim(np.array(float_execution_0[-1]).reshape(128),np.array(float_execution_1[-1]).reshape(128)))
print("Cos distance --> Francesco_0 quant , Francesco_1 quant")
print(cos_sim(np.array(quant_execution_0[-1]).reshape(128),np.array(quant_execution_1[-1]).reshape(128)))

print(float_execution_0[-1])
print(quant_execution_0[-1])

#TO print out each layer SNR Float vs Quantized
from nntool.graph.types import ConstantInputNode
qsnrs_by_layer = G.qsnrs(float_execution_0, quant_execution_0)
print("QSNR layer by layer (skipping constant layers like weights and biases):")
print(f"{'Layer Name':>30} (  #): {'QSNR':4}")
for i, q in enumerate(qsnrs_by_layer):
    if isinstance(G[i], ConstantInputNode):
        continue
    #print(f"{G[i].name[0:20]:>30} ({i:3}): {q:4}")
    print(f"{G[i].name:>30} ({i:3}): {q:4}")
  
    
G.draw(fusions=True,filepath='graph',quant_labels=True,nodes=G.nodes())
# G.qshow()


In [None]:
# np.max(float_execution_0[118][0]),np.min(float_execution_0[118][0])
# G.qsnrs(float_execution_0,quant_execution_0)[118]

# fig, ax = plt.subplots()
# ax.plot(float_execution_0[118][0].flatten())
# ax.plot(quant_execution_0[118][0].flatten())
# fig.show()

## For Debug

# G.quantization[G[118].name].cache
# float_execution_0[118][0].shape

# print(i,qsnr(float_execution_0[118][0],quant_execution_0[118][0]))

# print(float_execution_0[118][0],quant_execution_0[118][0])

# print(G.quantization[G[118].name].in_qs[1].scale)
# #print(G.quantization[G[118].name].cache)
# G.indexed_in_edges(G[118])


# QType.from_array_sq(G["_stage3_stage3_3_branch2_branch2_3_Conv_weights"].value,dtype=np.uint8,quantized_dimension=0).scale

# np.save("Overflow.npy",G["_stage3_stage3_3_branch2_branch2_3_Conv_weights"].value)

# G.quantization[G[118].name].in_qs[1].scale[20]=0
# for i in range(0,48):
#     print(i,qsnr(float_execution_0[118][0][:,:,i],quant_execution_0[118][0][:,:,i]))
# #
# G.quantization[G[94].name].in_qs[1].scale[13]
# G.quantization[G[94].name].in_qs[1].scale[42]


## Execute on Target

In [None]:
# On the target only fully integer values - NO DEQUANTIZE
int_execution = G.execute([francesco_1], dequantize=False, quantize=True)

In [None]:
# Autotiler options: make the autotiler allocate the input of the network and reuse that space after the first layer
# more L2 for the rest of the network
G[0].at_options.allocate = 1
G[0].at_options


In [None]:
res = G.execute_on_target(
    pmsis_os='freertos',
    platform="board",
    directory="test_run",
    input_tensors=int_execution[0],
    output_tensors=0,
    write_out_to_file=True,
    at_log=True,
    dont_run=False,
    settings=model_settings(l1_size=128000,l2_size=512000,tensor_directory="./tensors"),
    cmake=True,
    at_loglevel=1,
    print_output=True
)
for l in res.at_log[-29:-1]:
    print(l)

# res = G.gen_at_model(
#     settings=model_settings(l1_size=128000,l2_size=512000,tensor_directory="./tensors"),
#     directory="test_model",
#     at_loglevel=1
# )




In [None]:
print("Out Name, QSNR NNTool vs Target run")
for outn in G.output_nodes():
    out_target = np.fromfile(f"test_run/{outn.name.capitalize()}.bin", G.quantization[outn.name].in_qs[0].dtype)
    out_nntool = int_execution[G[outn.name].step_idx]
    print(f"{outn.name}, {G.qsnrs([out_target], [out_nntool[0].flatten()])}")
    #print(out_nntool[0].flatten())
    #print(out_target)

## Project Generation

In [None]:
G.gen_project(
    platform="board",
    directory="test_prj",
    input_tensors=int_execution[0],
    output_tensors=0,
    settings=model_settings(l1_size=128000,l2_size=512000,tensor_directory="./tensors"),
    cmake=True,
    at_loglevel=1)