In [1]:
!pip3 install brevitas -q

In [1]:

import sys
import torch
sys.path.append('..')
# from .. 
import metrics, preprocessing, utils, training
import numpy as np

preprocessing.BaseGenerator.MAX_NUMBER_OF_THREADS = 2
preprocessing.YoloDataGenerator.NUMBER_OF_THREADS = 1

metrics.CONSTANTS.OLD_TORCH = True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
dataset_local_path = '../../DATASETS/Merged_dataset'
folds = training.load_folds('../folds_state_path_bbox.pkl')
batch_size = 16


In [38]:
class DWConv2d(torch.nn.Module):
    def __init__(self,in_ch,intermediate_channels=1, bias=False, use_bn=True, use_relu=False, device=None):
        super().__init__()
        self.conv = torch.nn.Conv2d(in_ch,in_ch*intermediate_channels,3,padding=1, groups=in_ch, bias=bias)
    
        if use_bn:
            self.bn = torch.nn.BatchNorm2d(in_ch*intermediate_channels)
        else:
            self.bn = None
        if use_relu:
            self.relu = torch.nn.ReLU(True)
        else:
            self.relu = None
    
    def reorder(self, order:torch.tensor):
        conv = self.conv
        ch_in = self.conv.in_channels
        ch_out = self.conv.out_channels
        mul = ch_out // ch_in
        
        convs = []
        indeces = []
        for i in range(mul):
            ind = torch.arange(0,ch_in)*mul+i
            ind = ind[order]
            w = conv.weight[ind,...]
            b = conv.bias[ind,...] if conv.bias is not None else conv.bias
            L = torch.nn.Conv2d(ch_in,ch_in*1,3,padding=1, groups=ch_in, bias=b is not None)
                
            with torch.no_grad():
                L.weight[...] = w
                if b is not None:
                    L.bias[...] = b
            
            convs.append(L)
            indeces.append(ind)
            self.add_module(str(i),L)
        
        del self.conv
        self.conv = convs
        
        indeces = torch.cat(indeces)
        if self.bn is not None:
            with torch.no_grad():
                self.bn.weight[...] = self.bn.weight[indeces,...]
                self.bn.bias[...] = self.bn.bias[indeces,...]
                self.bn.running_mean[...] = self.bn.running_mean[indeces,...]
                self.bn.running_var[...] = self.bn.running_var[indeces,...]
        
        return indeces

    def forward(self, x):
        if type(self.conv) is list:
            y = []
            for L in self.conv:
                y.append(L(x))
            x = torch.cat(y,dim=1) if len(y) > 1 else y[0]
        else:
            x = self.conv(x)
        
        if self.bn:
            x = self.bn(x)
        if self.relu:
            x = self.relu(x)
        
        return x


class PWConv2d(torch.nn.Module):
    def __init__(self,in_ch, out_ch, bias=False, use_bn=True, use_relu=False, use_mp=False, device=None):
        super().__init__()
        self.conv = torch.nn.Conv2d(in_ch,out_ch,1,padding=0, bias=bias)
    
        if use_bn:
            self.bn = torch.nn.BatchNorm2d(out_ch)
        else:
            self.bn = None
        if use_relu:
            self.relu = torch.nn.ReLU(True)
        else:
            self.relu = None
        if use_mp:
            self.mp = torch.nn.MaxPool2d(2,2)
        else:
            self.mp = None
    
    def reorder(self, order:torch.tensor):
        conv = self.conv
        ch_in = self.conv.in_channels
        ch_out = self.conv.out_channels
        
        with torch.no_grad():
            self.conv.weight[...] = self.conv.weight[:,order,...] 
        
        return torch.arange(0,ch_out)
    
    def forward(self, x):
        x = self.conv(x)
        
        if self.bn:
            x = self.bn(x)
        if self.relu:
            x = self.relu(x)
        if self.mp:
            x = self.mp(x)
        
        return x


class AnchorMul(torch.nn.Module):
    def __init__(self, num_of_anchors, device=torch.device('cpu')):
        super().__init__()
        self.noa = num_of_anchors
        self.anchors = torch.nn.Parameter(data=torch.Tensor(1,2*self.noa,1,1), requires_grad=True)
        self.anchors.data.uniform_(-1,1)
        self.register_parameter('anchors', self.anchors)
        
        self.to(device)

    def forward(self, x):
        xvc = x[:,:-2*self.noa,:,:]
        xwh = x[:,-2*self.noa:,:,:]
        ywh = xwh*torch.exp(self.anchors)
        y = torch.cat((xvc,ywh), dim=1)

        return y

# float LN7
net = torch.nn.Sequential(
            DWConv2d(3, intermediate_channels=2, bias=False, use_bn=True, use_relu=True, device=device),
            PWConv2d(6,8, bias=True, use_bn=True, use_relu=False, use_mp=True, device=device),
            DWConv2d(8, bias=True, use_bn=True, use_relu=False, device=device),
            DWConv2d(8, intermediate_channels=2, bias=False, use_bn=True, use_relu=True, device=device),
            PWConv2d(16,32, bias=True, use_bn=True, use_relu=False, use_mp=True, device=device),
            DWConv2d(32, bias=True, use_bn=True, use_relu=False, device=device),
            DWConv2d(32, intermediate_channels=2, bias=False, use_bn=True, use_relu=True, device=device),
            PWConv2d(64,64, bias=False, use_bn=True, use_relu=False, use_mp=True, device=device),
            DWConv2d(64, bias=True, use_bn=True, use_relu=False, device=device),
            DWConv2d(64, intermediate_channels=2, bias=True, use_bn=True, use_relu=True, device=device),
            PWConv2d(128,128, bias=False, use_bn=True, use_relu=True, use_mp=True, device=device),
            DWConv2d(128, bias=True, use_bn=True, use_relu=False, device=device),
            DWConv2d(128, intermediate_channels=2, bias=True, use_bn=True, use_relu=True, device=device),
            PWConv2d(256,256, bias=True, use_bn=True, use_relu=True, device=device),
            DWConv2d(256, bias=True, use_bn=True, use_relu=False, device=device),
            DWConv2d(256, intermediate_channels=2, bias=True, use_bn=True, use_relu=True, device=device),
            PWConv2d(512,256, bias=True, use_bn=True, use_relu=True, device=device),
            DWConv2d(256, intermediate_channels=1, bias=False, use_bn=True, use_relu=True, device=device),
            PWConv2d(256,5*3, bias=True, use_bn=False, use_relu=False, device=device)
).to(device)
anchor_mul = AnchorMul(3,device).to(device)

sd = torch.load('weights_float_gciou.pt',map_location=device)
k = list(sd.keys())
v = list(sd.values())
# load anchor mul
am_sd = {list(anchor_mul.state_dict().keys())[0]:v[-1]}
anchor_mul.load_state_dict(am_sd)
# load LN7 weights
k = list(net.state_dict().keys()) # net keys
net_sd = {k:v for k,v in zip(k,v[:-1])}
net.load_state_dict(net_sd)

net = net.eval()
anchor_mul = anchor_mul.eval()

for k,v in net.state_dict().items():
    print(k,v.shape)

order = torch.arange(0,3)

renet = net
for n,m in renet.named_children():
#     print(n,"in_order", order)
    order = m.reorder(order)
    print(n,"out_order", order)
    

0.conv.weight torch.Size([6, 1, 3, 3])
0.bn.weight torch.Size([6])
0.bn.bias torch.Size([6])
0.bn.running_mean torch.Size([6])
0.bn.running_var torch.Size([6])
0.bn.num_batches_tracked torch.Size([])
1.conv.weight torch.Size([8, 6, 1, 1])
1.conv.bias torch.Size([8])
1.bn.weight torch.Size([8])
1.bn.bias torch.Size([8])
1.bn.running_mean torch.Size([8])
1.bn.running_var torch.Size([8])
1.bn.num_batches_tracked torch.Size([])
2.conv.weight torch.Size([8, 1, 3, 3])
2.conv.bias torch.Size([8])
2.bn.weight torch.Size([8])
2.bn.bias torch.Size([8])
2.bn.running_mean torch.Size([8])
2.bn.running_var torch.Size([8])
2.bn.num_batches_tracked torch.Size([])
3.conv.weight torch.Size([16, 1, 3, 3])
3.bn.weight torch.Size([16])
3.bn.bias torch.Size([16])
3.bn.running_mean torch.Size([16])
3.bn.running_var torch.Size([16])
3.bn.num_batches_tracked torch.Size([])
4.conv.weight torch.Size([32, 16, 1, 1])
4.conv.bias torch.Size([32])
4.bn.weight torch.Size([32])
4.bn.bias torch.Size([32])
4.bn.running_

In [39]:
image_shape = (112, 208, 3)

after_load = preprocessing.numpy_to_torch_iou_params(device)
to_anchors_single = lambda *x: preprocessing.to_anchors_for_iou_loss(*x,False,False)
# to_anchors_multi = lambda *x: preprocessing.to_anchors_for_iou_loss(*x,True,True)
to_anchors_multi = lambda *x: preprocessing.to_anchors_for_iou_loss(*x,True,False)
# to_anchors_single = to_anchors_multi

anchors = [22,33,
            5,10,
            15,5
          ]
anchors = np.array(anchors, np.float32).reshape((-1,2))
anchors *= np.array([[image_shape[0]/340, image_shape[1]/640]])

# pass example tensor
torch.cuda.empty_cache()
tensor = torch.rand((8,3,)+image_shape[:2]).to(device)
print("Input shape =",tensor.shape)
with torch.no_grad():
    result = net(tensor)

print("Result shape =",result.shape)

# get yolo paremeters
# output_sizes = net.output_sizes(input_size=image_shape[:2][::-1])[-1,:]
output_sizes = np.array(result.shape[2:][::-1])
del tensor
del result

print("Anchors: ")
print(anchors) 
print("Output sizes: ")
print(output_sizes) 

# CREATE GENERATORS
def numpy_to_tensor(X,y,device=device):
    return utils.data_to_tensor_v3(X,y,device)
(None,None,None,None)
grid_WH2 = image_shape[:2][::-1] // (2*output_sizes)

val_generator = preprocessing.YoloDataGenerator(
                            dataset_local_path,
                            input_shape=image_shape,
                            anchors=anchors,
                            images_labes=[], 
                            batch_size=batch_size,
                            name='ValGenerator', 
                            augmentator=None,
                            output_size=output_sizes,
                            after_load=after_load,
                            # bbox_to_anchors=to_anchors_single,
                            bbox_to_anchors=to_anchors_multi,
                            )
test_generator = preprocessing.YoloDataGenerator(
                            dataset_local_path,
                            input_shape=image_shape,
                            anchors=anchors,
                            images_labes=[], 
                            batch_size=batch_size,
                            name='TestGenerator', 
                            augmentator=None,
                            output_size=output_sizes,
                            after_load=after_load,
                            # bbox_to_anchors=to_anchors_single,
                            bbox_to_anchors=to_anchors_multi,
                            )

_, val_set = folds.__getitem__(0, train_folds=4)
val_generator.images_labes = val_set
test_generator.images_labes = folds.test_set

# decorator -> aplly anchor mul before metric calculation 
metric_iou = metrics.SingleObjectIOUsBasedMetrics(anchors, image_shape, device)
def mean_iou(y_pred, y_ref, metric_iou=metric_iou, anchor_mul=anchor_mul):
    y_pred = anchor_mul(y_pred)
    return metric_iou(y_pred, y_ref)



Input shape = torch.Size([8, 3, 112, 208])
Result shape = torch.Size([8, 15, 7, 13])
Anchors: 
[[ 7.247059  10.725    ]
 [ 1.6470588  3.25     ]
 [ 4.9411764  1.625    ]]
Output sizes: 
[13  7]


In [42]:
import torch
import torch.nn as nn


def evaluate(model,
             dataloader,
             evaluator, fl_model=net
             ):
    
    with torch.no_grad():
        score = 0.0
        cntr = 0
        for i in range(len(dataloader)):
            XY = dataloader[i]
            X = XY[0]
            Y = XY[1]
            L = X.shape[0]
            y_pred = model(X)
            y_fl = fl_model(X)
            print()
            ev = evaluator(y_pred, Y)
            ev_fl = evaluator(y_fl, Y)
            score = score*cntr + X.shape[0]*ev
            cntr += X.shape[0]
            score /= cntr
            print("\rEvaluation {}/{}. Score = {}".format(i,len(dataloader), score),end='')
        
        print("\rEvaluation {}/{}. Score = {}".format(len(dataloader),len(dataloader), score),end='\n')


def quantize(float_model:torch.nn.Module, 
             input_shape:tuple,
             quant_dir:str, 
             quant_mode:str, 
             device:torch.device,
             dataloader,
             evaluator):
    """
    :param float_model: float model with loaded weights
    :param input_shape: shape of input(CH,W,H)
    :param quant_dir: path to directory with quantized model components
    :param quant_mode: quant_mode in ['calib', 'test'] 
    :param data_loader: data_loader target is not needed - for 'calib' must be batch_size == 1
    :param evaluator: fcn/obj like: fcn(y_pred, y_ref) -> float 
    """
    # available in docker or after packaging 
    # vitis-AI-tools/..../pytorch../pytorch_nndct
    # and installing the package
    from pytorch_nndct.apis import torch_quantizer, dump_xmodel
    # model to device
    model = float_model.to(device)
    
    # That was present in vai tutorial.
    # I don't know if it affects to anything?
    # Force to merge BN with CONV for better quantization accuracy
    optimize = 1

    rand_in = torch.randn((1,)+input_shape[-1:]+input_shape[:2])
    print("get qunatizer start")
    try:
        quantizer = torch_quantizer(
            quant_mode, model, rand_in, output_dir=quant_dir, device=device)
    except Exception as e:
        print("exception:")
        print(e)
        return
    print("get qunatizer end")
        
    print("get quantized model start")
    quantized_model = quantizer.quant_model
    print("get quantized model end")

    # evaluate
    print("testing st")
    evaluate(quantized_model, dataloader, evaluator)
    print("testing end")

    # export config
    if quant_mode == 'calib':
        print("export config")
        quantizer.export_quant_config()
        print("export config end")
    # export model
    if quant_mode == 'test':
        print("export xmodel")
        quantizer.export_xmodel(deploy_check=False, output_dir=quant_dir)
        print("export xmodel end")

    return

"""
Needed to run of quantize.
first with quant_mode = 'calib'
second with quant_mode = 'test'
"""


"\nNeeded to run of quantize.\nfirst with quant_mode = 'calib'\nsecond with quant_mode = 'test'\n"

In [41]:
# Evaluate float model on test dataset
# evaluate(net,test_generator,evaluator=mean_iou)
# Evaluation 6123/6124. Score = 0.6773354439616692
# Evaluate float model on val dataset
evaluate(net,val_generator,evaluator=mean_iou)
# Evaluation 279/1839. Score = 0.6722009609852523


tensor([[ 81.6806,  32.6368,  99.1937,  43.4976],
        [ 87.4011,  24.5306,  96.6941,  31.8980],
        [ 52.0959,  44.3676,  91.8857,  65.5496],
        [ 43.8944,  56.9842,  60.0490,  91.2053],
        [127.0770,  48.5583, 131.6846,  59.3338],
        [115.8754,  77.9913, 123.0046,  88.4065],
        [131.0462,  33.9482, 145.3719,  53.1600],
        [ 85.9318,  45.4107,  93.3410,  54.6256],
        [115.3234,   0.3449, 121.9520,   7.7133],
        [ 74.6851,  64.6351,  96.4188,  86.0490],
        [ 63.5876,  41.2104,  91.3656,  58.4560],
        [ 80.8527,  42.8977, 113.9336,  61.0240],
        [ 98.1013,  69.2490, 106.1156,  78.4725],
        [ 76.2605,  37.5155, 104.2839,  52.5087],
        [ 88.8100,  57.5367,  91.7047,  68.3968],
        [ 95.0712,  10.5306, 102.7643,  32.9582]]) -> tensor([[ 78.4613,  31.6114,  95.9406,  41.3151],
        [ 90.7985,  27.9480,  97.4122,  34.1932],
        [ 51.0604,  44.5457,  92.5243,  66.5793],
        [ 43.2100,  57.4933,  59.6000,  92.58

Evaluation 2/1839. Score = 0.6294100483258566
tensor([[ 62.3934,  46.6253, 100.5177,  67.2725],
        [108.2171,  70.6605, 118.3806,  86.0494],
        [107.8400,  50.1967, 122.1890,  73.9589],
        [ 38.2476,  27.6271,  66.4015,  53.9771],
        [ 52.2678,  44.6894,  80.9984,  60.0702],
        [122.7068,  10.7013, 150.3920,  50.8790],
        [ 98.3978,  97.4827, 115.0941, 112.6899],
        [109.8851,  78.3337, 134.2498, 110.2799],
        [ 48.7326,  59.7427,  69.6863,  77.5438],
        [ 64.3553,  55.7510,  79.5584,  63.6748],
        [ 58.2560,  46.0951,  99.3724,  67.5925],
        [ 81.0107,  42.7253,  97.1835,  51.8389],
        [ -0.3532,  77.2175,  18.9984,  98.0364],
        [ 69.6695,  46.7665,  99.0805,  65.4219],
        [ 80.9134,  24.1599,  99.9449,  63.6909],
        [ 40.0450,  40.2753,  83.4527,  95.4444]]) -> tensor([[ 64.0234,  47.5067, 102.8659,  68.0773],
        [107.6434,  70.5756, 118.6738,  86.2804],
        [107.6481,  50.0998, 125.7120,  74.8238],


Evaluation 5/1839. Score = 0.6603614588578542
tensor([[7.1826e+01, 4.4580e+01, 9.0298e+01, 6.3399e+01],
        [5.3886e+01, 4.5869e+01, 8.4176e+01, 6.2084e+01],
        [1.0602e+02, 9.1163e+00, 1.2494e+02, 3.6784e+01],
        [8.0977e+01, 3.2489e+01, 1.1384e+02, 4.8418e+01],
        [5.4530e+01, 3.4229e+01, 7.6839e+01, 4.7038e+01],
        [9.7242e+01, 5.5957e+01, 1.0683e+02, 6.4241e+01],
        [4.2715e-02, 4.2160e+01, 2.1095e+01, 6.4325e+01],
        [3.9997e+01, 5.8097e+01, 4.6445e+01, 8.1803e+01],
        [7.9990e+01, 3.6757e+01, 8.8260e+01, 4.9384e+01],
        [5.8799e+01, 5.2944e+01, 6.2618e+01, 6.2446e+01],
        [6.3593e+01, 3.9459e+01, 7.9490e+01, 5.0431e+01],
        [3.6124e+01, 4.1875e+01, 7.6052e+01, 6.4910e+01],
        [6.9843e+01, 4.7069e+01, 9.3644e+01, 5.9518e+01],
        [8.1452e+01, 4.1563e+01, 1.0041e+02, 5.2132e+01],
        [6.8510e+01, 4.4551e+01, 8.2696e+01, 4.9110e+01],
        [5.0731e+01, 6.2725e+01, 6.4878e+01, 8.4697e+01]]) -> tensor([[ 68.4469,  45

Evaluation 8/1839. Score = 0.658987992339664
tensor([[ 53.0887,  35.4823,  99.0026,  96.7406],
        [ 97.3823,  37.4959, 118.1282,  54.3504],
        [ 92.4035,  46.3831, 121.9105,  62.5095],
        [ 88.8730,  56.5134,  96.3300,  69.8535],
        [ 78.4299,  55.8172,  84.7743,  68.7593],
        [110.7223,  23.8526, 119.2623,  44.1663],
        [114.8026,  67.6038, 119.4931,  81.0812],
        [107.8043,  20.7361, 118.2099,  41.6455],
        [110.4375,  21.8115, 116.0114,  38.8865],
        [ 73.2994,  44.6295,  90.0321,  62.6071],
        [ 99.9495,  35.1706, 109.4680,  66.4701],
        [102.3450,  34.9184, 112.4918,  49.6926],
        [ 96.7851,  51.0657, 131.8183,  83.4001],
        [ 96.3392,  50.2148, 100.0167,  63.3161],
        [ 46.3887,  58.8454,  58.7577,  95.7636],
        [ 52.6911,  46.3737,  80.6834,  61.6902]]) -> tensor([[ 52.1591,  33.4444,  96.9773,  92.9444],
        [ 54.9437,  39.2000,  63.7906,  47.1333],
        [ 94.6243,  45.6199, 126.1564,  62.1180],
 

Evaluation 11/1839. Score = 0.676693374911944
tensor([[ 33.3246,  50.9319,  42.4301,  79.2837],
        [ 61.3982,  28.3292,  82.7675,  46.7174],
        [ 57.4447,  38.4744,  84.0580,  52.5019],
        [ 74.5035,  43.4067,  99.7385,  59.8128],
        [ 36.4868,  35.8495,  62.2638,  56.8921],
        [ 67.3818,  77.7166,  74.7641,  84.7868],
        [ 87.6640,  65.8174,  94.3433,  72.9428],
        [ 69.8764,  28.2016,  86.8127,  58.9050],
        [ 77.5745,  43.0543,  99.4738,  74.3114],
        [ 87.6338,  37.0308, 108.9128, 114.0535],
        [ 37.9020,  42.2784,  68.8714, 115.5354],
        [153.6636,  35.3252, 171.5528,  67.8371],
        [105.6192,  66.4168, 110.8316,  72.1975],
        [ 27.2900,   1.2968,  65.1315,  25.6501],
        [ 56.6676,  23.0942,  77.0408,  54.7923],
        [ 72.0170,  39.9977,  94.1452,  52.0303]]) -> tensor([[ 65.8985,  47.9484,  76.1808,  80.5093],
        [ 44.9767,  34.6320,  52.0390,  40.5697],
        [ 56.3639,  38.9419,  82.5227,  52.7240],


Evaluation 14/1839. Score = 0.664717948436737
tensor([[ 66.0191,  16.0085,  83.2145,  27.4439],
        [ 98.5353,  49.6955, 110.2153,  69.0333],
        [ 83.9217,  74.7672,  88.5420,  86.8918],
        [ 54.2242,  36.1754,  98.0809,  67.6112],
        [ 82.9002,  32.4503,  87.7197,  46.7483],
        [ 83.7713,  32.3536,  88.4205,  46.8347],
        [105.4377,  53.1241, 112.0067,  69.8103],
        [ 87.4807,  56.9007,  94.9119,  68.1726],
        [ 56.9754,  45.0277,  74.6393,  55.0797],
        [ 66.4340,  50.9500,  96.2100,  69.2516],
        [ 95.2512,  37.0423, 106.9266,  64.1194],
        [ 75.7971,  31.3949,  93.3722,  50.7457],
        [ 99.5366,  49.4725, 110.8457,  69.0446],
        [ 57.0553,  51.4934, 112.8847,  92.5672],
        [ 76.9399,  37.2983,  91.3110,  77.1025],
        [ 99.3474,  16.7621, 110.6163,  21.2383]]) -> tensor([[ 92.3484,  15.2444,  98.8781,  18.5111],
        [ 97.8458,  53.6869, 111.1096,  68.5690],
        [ 84.1086,  74.5111,  88.7727,  86.6444],


Evaluation 17/1839. Score = 0.6680255399809943
tensor([[ 92.0653,  31.8294, 102.4660,  51.9653],
        [115.2532,  41.1635, 127.6672,  73.7492],
        [ 82.9463,  37.0035,  95.3097,  76.4190],
        [ 66.3280,  40.3693,  92.2102,  53.6815],
        [ 44.0937,  39.7172,  67.9265,  64.6173],
        [132.2274,  50.6119, 141.6245,  64.7187],
        [ 58.6359,  10.4817,  71.5193,  48.8848],
        [ 78.7117,  30.2843,  93.2971,  52.5774],
        [ 82.4348,  55.1552,  97.5844,  96.7673],
        [ 99.6716,  34.7908, 103.7473,  44.8570],
        [100.2644,  20.5109, 105.3407,  36.6384],
        [ 96.5653,  48.8785, 111.8783,  70.5416],
        [ 75.0339,  95.0427,  79.0815, 105.8290],
        [ 51.2740,  60.7060,  65.9086,  90.5867],
        [ 50.4339,  35.4597,  63.0996,  40.6215],
        [ 81.5530,  58.0865,  91.6620,  67.2241]]) -> tensor([[ 91.8820,  34.2222, 102.9203,  60.3556],
        [  0.9991,  34.5784,  15.8233,  84.4449],
        [ 86.1591,  37.7222,  94.2727,  75.0556],

KeyboardInterrupt: 

In [43]:
# Use only subset of val set
# set whole dataset
val_generator.images_labes = val_set
# shuffle samples
val_generator.on_epoch_end()
# get subset (100) of samples
subset = val_generator.images_labes[:200]
val_generator.images_labes = subset
# process only one image per forward
val_generator.batch_size = 1

In [44]:
# Quantize model - calib
quantize(net, 
         image_shape,
         quant_dir='quant_dir',
         quant_mode='calib',
         device=device,
         dataloader=val_generator,
         evaluator=mean_iou)

get qunatizer start

[0;32m[NNDCT_NOTE]: Quantization calibration process start up...[0m

[0;32m[NNDCT_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[NNDCT_NOTE]: =>Parsing Sequential...[0m

[0;32m[NNDCT_NOTE]: =>Doing weights equalization...[0m

[0;32m[NNDCT_NOTE]: =>Quantizable module is generated.(quant_dir/Sequential.py)[0m
get qunatizer end
get quantized model start

[0;32m[NNDCT_NOTE]: =>Get module with quantization.[0m
get quantized model end
testing st

tensor([[82.9830, 15.8512, 96.9357, 21.9864]]) -> tensor([[82.9783, 15.9273, 97.8150, 21.8291]])
tensor([[82.8921, 16.1012, 97.2037, 22.2388]]) -> tensor([[82.9783, 15.9273, 97.8150, 21.8291]])
Evaluation 0/200. Score = 0.9149556756019592
tensor([[149.3537,  49.2442, 158.5651,  57.0221]]) -> tensor([[70.2796, 53.5049, 77.5149, 60.6735]])
tensor([[149.9590,  50.6862, 159.0705,  57.9027]]) -> tensor([[70.2796, 53.5049, 77.5149, 60.6735]])
Evaluation 1/200. Score = 0.4574778378009796
tensor([[143.4172,  52.0475, 156.6641

Evaluation 31/200. Score = 0.6443446213379503
tensor([[69.0401, 39.8741, 96.7975, 54.2032]]) -> tensor([[69.6296, 40.4418, 97.1946, 54.5580]])
tensor([[69.2421, 39.9270, 96.5429, 54.4005]]) -> tensor([[69.6296, 40.4418, 97.1946, 54.5580]])
Evaluation 32/200. Score = 0.6524006932070762
tensor([[ 52.8498,  82.8939,  63.0689, 107.1833]]) -> tensor([[ 53.1703,  86.1778,  62.9648, 112.1556]])
tensor([[ 52.5444,  84.2916,  62.0794, 106.3166]]) -> tensor([[ 53.1703,  86.1778,  62.9648, 112.1556]])
Evaluation 33/200. Score = 0.6539194838089103
tensor([[147.9645,  70.3830, 154.0458,  79.3507]]) -> tensor([[147.4777,  70.4511, 154.4905,  79.6885]])
tensor([[147.4376,  70.3821, 154.9057,  79.3159]]) -> tensor([[147.4777,  70.4511, 154.4905,  79.6885]])
Evaluation 34/200. Score = 0.6595099270343782
tensor([[60.4306, 39.8256, 92.4430, 58.0972]]) -> tensor([[60.7175, 39.5071, 92.3288, 57.7220]])
tensor([[60.7662, 39.8534, 92.3512, 57.6074]]) -> tensor([[60.7175, 39.5071, 92.3288, 57.7220]])
Evaluati

Evaluation 64/200. Score = 0.6279428587510035
tensor([[101.5210,  33.8711, 117.7414,  66.8664]]) -> tensor([[111.0577,  35.9545, 119.3077,  64.2727]])
tensor([[102.6047,  35.6941, 117.0436,  65.1602]]) -> tensor([[111.0577,  35.9545, 119.3077,  64.2727]])
Evaluation 65/200. Score = 0.6238349910938379
tensor([[ 99.6319,  59.5388, 106.3784,  69.3992]]) -> tensor([[100.4328,  59.8889, 107.1180,  69.2222]])
tensor([[ 99.9548,  59.4238, 107.0767,  69.3762]]) -> tensor([[100.4328,  59.8889, 107.1180,  69.2222]])
Evaluation 66/200. Score = 0.626196550789164
tensor([[62.7448, 66.6997, 65.4693, 73.3816]]) -> tensor([[62.8094, 67.2000, 65.2969, 73.4222]])
tensor([[62.8671, 66.2092, 65.4529, 72.8567]]) -> tensor([[62.8094, 67.2000, 65.2969, 73.4222]])
Evaluation 67/200. Score = 0.6297977514126721
tensor([[ 85.1109,  39.1980, 117.1553,  59.2295]]) -> tensor([[ 87.1188,  39.2501, 115.4133,  59.0452]])
tensor([[ 85.2030,  38.9899, 115.2731,  59.4933]]) -> tensor([[ 87.1188,  39.2501, 115.4133,  59.0

Evaluation 97/200. Score = 0.6548901006391709
tensor([[ 5.7573, 40.4240, 30.9803, 49.7384]]) -> tensor([[ 6.5591, 44.2359, 29.8041, 51.5855]])
tensor([[ 5.3670, 40.2759, 31.1953, 49.6879]]) -> tensor([[ 6.5591, 44.2359, 29.8041, 51.5855]])
Evaluation 98/200. Score = 0.6533263290284502
tensor([[93.5566, 41.1948, 99.6379, 58.6197]]) -> tensor([[93.2812, 39.6667, 98.5672, 54.4444]])
tensor([[94.0054, 41.4621, 99.8569, 59.5204]]) -> tensor([[93.2812, 39.6667, 98.5672, 54.4444]])
Evaluation 99/200. Score = 0.6527408552879933
tensor([[ 96.0289,  78.0528, 109.9815,  90.5533]]) -> tensor([[ 96.4943,  78.5556, 110.2539,  90.4856]])
tensor([[ 95.7743,  78.1721, 110.0405,  90.1701]]) -> tensor([[ 96.4943,  78.5556, 110.2539,  90.4856]])
Evaluation 100/200. Score = 0.6553080531152998
tensor([[80.2821, 85.6971, 89.9842, 96.0366]]) -> tensor([[80.9640, 85.8584, 90.6186, 95.5860]])
tensor([[80.0116, 85.3609, 90.1945, 95.7581]]) -> tensor([[80.9640, 85.8584, 90.6186, 95.5860]])
Evaluation 101/200. Sco

Evaluation 130/200. Score = 0.6490474272179879
tensor([[105.7359,  51.8601, 126.8702,  78.5673]]) -> tensor([[106.1852,  49.7778, 126.3961,  77.3111]])
tensor([[105.7579,  51.9523, 126.6114,  78.4775]]) -> tensor([[106.1852,  49.7778, 126.3961,  77.3111]])
Evaluation 131/200. Score = 0.6505953784102475
tensor([[19.0641, 21.4923, 40.1984, 62.4264]]) -> tensor([[21.1437, 20.2222, 41.8211, 63.6222]])
tensor([[19.1447, 22.3568, 39.9666, 62.5186]]) -> tensor([[21.1437, 20.2222, 41.8211, 63.6222]])
Evaluation 132/200. Score = 0.6517275179100847
tensor([[131.8853,  38.4762, 142.1043,  50.3974]]) -> tensor([[131.3864,  41.0667, 144.4318,  51.8000]])
tensor([[131.7295,  38.1886, 142.8624,  50.5969]]) -> tensor([[131.3864,  41.0667, 144.4318,  51.8000]])
Evaluation 133/200. Score = 0.6513254435067483
tensor([[101.5233,  43.7543, 106.4767,  58.0833]]) -> tensor([[ 96.8570,  37.6444, 101.0547,  52.7333]])
tensor([[100.5850,  43.4590, 105.0086,  58.5113]]) -> tensor([[ 96.8570,  37.6444, 101.0547, 

Evaluation 163/200. Score = 0.6394853635510805
tensor([[ 89.3253,  43.6670, 107.4122,  53.5274]]) -> tensor([[ 88.5945,  43.5265, 105.5153,  52.3040]])
tensor([[ 89.0305,  42.7438, 107.5047,  52.4190]]) -> tensor([[ 88.5945,  43.5265, 105.5153,  52.3040]])
Evaluation 164/200. Score = 0.6402395338701988
tensor([[107.3771,  77.2055, 118.1405,  86.6090]]) -> tensor([[106.9231,  77.3899, 117.3893,  86.5339]])
tensor([[107.3266,  77.2851, 117.9401,  86.7433]]) -> tensor([[106.9231,  77.3899, 117.3893,  86.5339]])
Evaluation 165/200. Score = 0.6416819548011332
tensor([[104.5853,  26.5119, 133.4250,  63.6505]]) -> tensor([[105.2312,  26.1333, 128.9781,  60.2000]])
tensor([[104.3795,  26.7010, 134.0557,  63.9728]]) -> tensor([[105.2312,  26.1333, 128.9781,  60.2000]])
Evaluation 166/200. Score = 0.6423194151417727
tensor([[56.8131, 32.5950, 59.1056, 38.5314]]) -> tensor([[56.5906, 32.9778, 59.0781, 38.7333]])
tensor([[56.5739, 32.8158, 58.8984, 38.7847]]) -> tensor([[56.5906, 32.9778, 59.0781,

Evaluation 196/200. Score = 0.6460848385592222
tensor([[88.6262, 20.2795, 96.2474, 41.2930]]) -> tensor([[88.6172, 21.3111, 96.0797, 40.9111]])
tensor([[88.5852, 20.3146, 96.2938, 41.4454]]) -> tensor([[88.6172, 21.3111, 96.0797, 40.9111]])
Evaluation 197/200. Score = 0.6474515285438179
tensor([[ 48.2821,  71.8555,  57.9842, 104.1445]]) -> tensor([[ 48.7626,  70.4037,  57.9489, 103.0073]])
tensor([[ 48.0938,  72.4258,  57.4088, 103.9020]]) -> tensor([[ 48.7626,  70.4037,  57.9489, 103.0073]])
Evaluation 198/200. Score = 0.648633422412507
tensor([[34.5309, 54.4775, 39.7353, 71.0950]]) -> tensor([[35.3500, 54.0750, 40.6000, 70.0000]])
tensor([[35.1683, 54.7555, 40.2265, 71.1529]]) -> tensor([[35.3500, 54.0750, 40.6000, 70.0000]])
Evaluation 200/200. Score = 0.6489181493205249
testing end
export config

[0;32m[NNDCT_NOTE]: =>Exporting quant config.(quant_dir/quant_info.json)[0m
export config end


In [45]:
# Quantize model - test
quantize(net, 
         image_shape,
         quant_dir='quant_dir',
         quant_mode='test',
         device=device,
         dataloader=val_generator,
         evaluator=mean_iou)


get qunatizer start

[0;32m[NNDCT_NOTE]: Quantization test process start up...[0m

[0;32m[NNDCT_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[NNDCT_NOTE]: =>Parsing Sequential...[0m

[0;32m[NNDCT_NOTE]: =>Doing weights equalization...[0m

[0;32m[NNDCT_NOTE]: =>Quantizable module is generated.(quant_dir/Sequential.py)[0m
get qunatizer end
get quantized model start

[0;32m[NNDCT_NOTE]: =>Get module with quantization.[0m
get quantized model end
testing st

tensor([[82.6114, 16.4956, 97.3073, 22.6308]]) -> tensor([[82.9783, 15.9273, 97.8150, 21.8291]])
tensor([[82.8921, 16.1012, 97.2037, 22.2388]]) -> tensor([[82.9783, 15.9273, 97.8150, 21.8291]])
Evaluation 0/200. Score = 0.7816399335861206
tensor([[149.5866,  50.3319, 158.3321,  57.7494]]) -> tensor([[70.2796, 53.5049, 77.5149, 60.6735]])
tensor([[149.9590,  50.6862, 159.0705,  57.9027]]) -> tensor([[70.2796, 53.5049, 77.5149, 60.6735]])
Evaluation 1/200. Score = 0.3908199667930603
tensor([[142.6812,  51.5860, 155.5851,  72.5

Evaluation 33/200. Score = 0.6514160869752659
tensor([[147.9645,  69.6833, 154.0458,  78.2354]]) -> tensor([[147.4777,  70.4511, 154.4905,  79.6885]])
tensor([[147.4992,  70.3897, 154.9021,  79.2726]]) -> tensor([[147.4777,  70.4511, 154.4905,  79.6885]])
Evaluation 34/200. Score = 0.6531232110091617
tensor([[61.0590, 40.0826, 93.1034, 57.4350]]) -> tensor([[60.7175, 39.5071, 92.3288, 57.7220]])
tensor([[60.7700, 39.9509, 92.0332, 57.4952]]) -> tensor([[60.7175, 39.5071, 92.3288, 57.7220]])
Evaluation 35/200. Score = 0.660651485953066
tensor([[ 93.8182,  41.2812, 121.5756,  63.3249]]) -> tensor([[ 96.0797,  41.8444, 116.1352,  60.2000]])
tensor([[ 94.0602,  41.6275, 121.3790,  63.5427]]) -> tensor([[ 96.0797,  41.8444, 116.1352,  60.2000]])
Evaluation 36/200. Score = 0.6594174625100315
tensor([[70.0836, 44.1185, 92.3438, 56.6190]]) -> tensor([[69.5411, 44.4103, 91.1647, 57.5027]])
tensor([[70.0762, 44.0951, 92.2612, 56.5711]]) -> tensor([[69.5411, 44.4103, 91.1647, 57.5027]])
Evaluatio

Evaluation 69/200. Score = 0.6278874618666511
tensor([[89.4870, 40.3983, 98.6985, 65.8679]]) -> tensor([[89.7894, 42.0700, 98.4405, 66.2262]])
tensor([[89.0037, 40.5938, 98.5672, 67.3689]]) -> tensor([[89.7894, 42.0700, 98.4405, 66.2262]])
Evaluation 70/200. Score = 0.631357386918135
tensor([[60.9972, 52.3910, 79.0841, 89.6193]]) -> tensor([[62.5800, 53.7600, 79.3425, 88.4800]])
tensor([[61.0980, 52.6421, 78.6893, 88.0441]]) -> tensor([[62.5800, 53.7600, 79.3425, 88.4800]])
Evaluation 71/200. Score = 0.6343412788377867
tensor([[68.3891, 63.5458, 77.6005, 70.2918]]) -> tensor([[58.9227, 58.9556, 67.4734, 62.6889]])
tensor([[67.4746, 63.4515, 77.4098, 70.1139]]) -> tensor([[58.9227, 58.9556, 67.4734, 62.6889]])
Evaluation 72/200. Score = 0.6256530016991795
tensor([[112.9311,  62.5792, 123.1501,  75.6871]]) -> tensor([[112.5594,  67.2000, 122.5094,  82.4445]])
tensor([[112.3012,  62.8043, 122.8449,  76.0473]]) -> tensor([[112.5594,  67.2000, 122.5094,  82.4445]])
Evaluation 73/200. Score 

Evaluation 102/200. Score = 0.6554187872980126
tensor([[ 71.4814,  35.9056, 100.5999,  63.9089]]) -> tensor([[71.2902, 36.1511, 99.5933, 65.0736]])
tensor([[70.9221, 35.8461, 99.9844, 64.0972]]) -> tensor([[71.2902, 36.1511, 99.5933, 65.0736]])
Evaluation 103/200. Score = 0.657932132591738
tensor([[ 83.0485,  37.1825, 103.1138,  48.5513]]) -> tensor([[84.0257, 58.9368, 99.8505, 65.8271]])
tensor([[ 83.4608,  37.6866, 103.2377,  48.7964]]) -> tensor([[84.0257, 58.9368, 99.8505, 65.8271]])
Evaluation 104/200. Score = 0.6516661122813405
tensor([[51.3058, 40.0659, 90.7046, 73.9238]]) -> tensor([[51.6444, 42.3111, 92.0889, 78.7111]])
tensor([[51.2949, 40.2127, 91.4062, 73.5537]]) -> tensor([[51.6444, 42.3111, 92.0889, 78.7111]])
Evaluation 105/200. Score = 0.6529800999786223
tensor([[88.7525, 55.2203, 92.9812, 68.9652]]) -> tensor([[90.9492, 58.4889, 94.0586, 71.5556]])
tensor([[87.8906, 55.1512, 91.9173, 69.2991]]) -> tensor([[90.9492, 58.4889, 94.0586, 71.5556]])
Evaluation 106/200. Score

Evaluation 136/200. Score = 0.6445040403926141
tensor([[129.5480,  79.5301, 142.4520,  92.5512]]) -> tensor([[129.3148,  80.0862, 142.7365,  92.8845]])
tensor([[129.4337,  80.0585, 142.9533,  92.9615]]) -> tensor([[129.3148,  80.0862, 142.7365,  92.8845]])
Evaluation 137/200. Score = 0.646396183146915
tensor([[92.2833, 35.0342, 95.9022, 46.9554]]) -> tensor([[92.6594, 35.7778, 96.3906, 47.7556]])
tensor([[92.4112, 35.5795, 96.0964, 47.0133]]) -> tensor([[92.6594, 35.7778, 96.3906, 47.7556]])
Evaluation 138/200. Score = 0.6471007317518702
tensor([[ 98.0185,  28.4718, 111.9712,  49.5385]]) -> tensor([[100.2773,  28.0000, 112.5594,  50.0889]])
tensor([[ 99.2526,  28.6691, 112.6200,  49.4695]]) -> tensor([[100.2773,  28.0000, 112.5594,  50.0889]])
Evaluation 139/200. Score = 0.6480907024229964
tensor([[71.2977, 31.0832, 97.3084, 57.7904]]) -> tensor([[74.5673, 33.4091, 95.8269, 56.6364]])
tensor([[72.6063, 32.1882, 96.2226, 57.8035]]) -> tensor([[74.5673, 33.4091, 95.8269, 56.6364]])
Evalu

tensor([[72.6389, 33.3692, 94.4875, 58.8163]]) -> tensor([[73.1031, 35.0000, 94.9875, 59.9667]])
tensor([[72.1840, 33.1491, 93.9966, 58.7159]]) -> tensor([[73.1031, 35.0000, 94.9875, 59.9667]])
Evaluation 171/200. Score = 0.644848331349855
tensor([[48.0703, 60.9478, 60.0110, 71.7897]]) -> tensor([[50.7531, 57.4000, 64.2562, 68.6000]])
tensor([[49.0809, 57.8747, 63.9969, 67.9493]]) -> tensor([[50.7531, 57.4000, 64.2562, 68.6000]])
Evaluation 172/200. Score = 0.6432458522950043
tensor([[105.3390,  34.1544, 112.8234,  51.5793]]) -> tensor([[110.6937,  36.8667, 115.6687,  52.2667]])
tensor([[105.1861,  33.9989, 112.0645,  51.2634]]) -> tensor([[110.6937,  36.8667, 115.6687,  52.2667]])
Evaluation 173/200. Score = 0.6409267192856084
tensor([[57.3738, 56.0659, 73.6773, 89.9238]]) -> tensor([[56.6200, 55.6267, 73.3825, 90.7200]])
tensor([[57.7223, 56.4539, 73.2111, 89.1682]]) -> tensor([[56.6200, 55.6267, 73.3825, 90.7200]])
Evaluation 174/200. Score = 0.6424653862799669
tensor([[ 93.8072,  3

In [47]:
!vai_c_xir --xmodel quant_dir/Sequential_int.xmodel --arch arch.json --net_name LN7_VAI --output_dir  build

**************************************************
* VITIS_AI Compilation - Xilinx Inc.
**************************************************
[UNILOG][INFO] The compiler log will be dumped at "/tmp/vitis-ai-user/log/xcompiler-20220123-143045-46573"
[UNILOG][INFO] Compile mode: dpu
[UNILOG][INFO] Debug mode: function
[UNILOG][INFO] Target architecture: DPUCZDX8G_CUSTOMIZED
[UNILOG][INFO] Graph name: Sequential, with op num: 206
[UNILOG][INFO] Begin to compile...
[UNILOG][INFO] Total device subgraph number 15, DPU subgraph number 7
[UNILOG][INFO] Compile done.
[UNILOG][INFO] The meta json is saved to "/workspace/TRAIN/Vitis_AI/build/meta.json"
[UNILOG][INFO] The compiled xmodel is saved to "/workspace/TRAIN/Vitis_AI/build/LN7_VAI.xmodel"
[UNILOG][INFO] The compiled xmodel's md5sum is 4bbd58e58af4657ba30c6331d70fae7f, and been saved to "/workspace/TRAIN/Vitis_AI/build/md5sum.txt"
