# Setting Up SDK Artifacts

In [1]:
import os
os.environ['SNPE_ROOT']="/opt/qcom/aistack/qairt/2.25.0.240728" #set up your snpe path here.
os.environ['RAW_FILE_FOLDER']="raw"
os.environ['DLC32']="models/detr_resnet101_fp32.dlc"  # Use the path to your non-quantized dlc
os.environ['DLC8']="models/detr_resnet101_w8a8.dlc"              # Use the path to your Quantized dlc
os.environ['TARGET_INPUT_LIST']="list.txt"  # Use the name of the input file
os.environ['ONDEVICE_FOLDER']="detr"
os.environ['DEVICE_HOST']="localhost"
os.environ['DEVICE_ID']="503bd507" #fill your device-id. Use command "adb devices" to get devices names. example :"e18d5d0"
os.environ['SNPE_TARGET_ARCH']="aarch64-android"
os.environ['SNPE_TARGET_STL']="libc++_shared.so"
os.environ['SNPE_TARGET_DSPARCH']="hexagon-v75" 

In [2]:
import os
path = "/media/code/qnn/qidk/Solutions/VisionSolution1-ObjectDetection-DETR"
os.chdir(path)
print(os.getcwd())

/home/liuqi/code/qnn/qidk/Solutions/VisionSolution1-ObjectDetection-DETR


## Generate model

In [3]:
import torch
import os
import shutil
import torch.nn as nn
model = torch.hub.load('facebookresearch/detr', 'detr_resnet101', pretrained=True)
model.eval()
dummy_input=torch.randn(1, 3, 800, 1066)
output = model(dummy_input)
print(output['pred_logits'].shape)

class ModifiedModel(nn.Module):
    def __init__(self):
        super(ModifiedModel,self).__init__()
        self.model = model
        self.model.eval()
    def forward(self,pixel_values):
        output = self.model(pixel_values)
        output['pred_logits'] = output['pred_logits'].softmax(-1)[0,:,:-1]
        return output
customModel = ModifiedModel()
customModel.eval()
dummy_input=torch.randn(1, 3, 800, 1066)
output = customModel(dummy_input)
print(output['pred_logits'].shape)

Using cache found in /home/liuqi/.cache/torch/hub/facebookresearch_detr_main
  dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)


torch.Size([1, 100, 92])
torch.Size([100, 91])


In [4]:
os.makedirs('models', exist_ok=True)

In [5]:
dummy_input=torch.randn(1, 3, 800, 1066)

torch.onnx.export(customModel, dummy_input, "models/detr_resnet101.onnx", opset_version=11
                  , verbose=False)

  max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64)
  for img in tensor_list:


In [6]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-onnx-to-dlc --input_network models/detr_resnet101.onnx --output_path models/detr_resnet101_fp32.dlc
snpe-dlc-info -i models/detr_resnet101_fp32.dlc > models/detr_resnet101_fp32.txt

[INFO] AISW SDK environment set
[INFO] QNN_SDK_ROOT: /opt/qcom/aistack/qairt/2.25.0.240728
[INFO] SNPE_ROOT: /opt/qcom/aistack/qairt/2.25.0.240728


2025-02-27 12:12:51,550 - 235 - INFO - Simplified model validation is successful
2025-02-27 12:13:02,194 - 235 - INFO - INFO_INITIALIZATION_SUCCESS: 
2025-02-27 12:13:02,747 - 235 - INFO - INFO_CONVERSION_SUCCESS: Conversion completed successfully
2025-02-27 12:13:03,253 - 235 - INFO - INFO_WRITE_SUCCESS: 


## import libraries

In [7]:
import math
import os
from PIL import Image
import requests
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import torch
import shutil
import numpy as np
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
torch.set_grad_enabled(False);
import os
import cv2
import numpy as np
from numpy import asarray
from PIL import Image
import glob
import torch.nn.functional as nnf
import subprocess
!pip3 install ipywidgets

Looking in indexes: https://mirrors.aliyun.com/pypi/simple


## Getting the Dataset and Preparation

In [8]:
# User can download dataset of their choice for accuracy validation. 
# User needs to follow the pre/post processing steps prescribed in dataset (or) given below. 
# You can use coco val2017 or part of it.

### Pre-Processing Steps of DETR Model

In [9]:
# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

def plot_results(pil_img, prob, boxes,Image_count):
    fig=plt.figure(figsize=(8,8))
    ax1=fig.add_subplot(2,2,3)
    ax1.imshow(pil_img)
    ax = plt.gca()
    colors = COLORS * 100
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, color=c, linewidth=1))
        cl = p.argmax()
        text = f'{CLASSES[cl]}: {p[cl]:0.2f}'
        ax.text(xmin, ymin, text, fontsize=10,
                bbox=dict(alpha=0.5))
    plt.savefig(str(Image_count)+".jpg")
    if Image_count%2==0:
        shutil.move(str(Image_count)+".jpg","output/CPU")
    else:
        shutil.move(str(Image_count)+".jpg","output/DSP")
    plt.show()

### Steps to create raw images

In [11]:
#黑白图会转失败，少转一些也没关系。
name="raw"
os.system('mkdir ' + name)
def detect(imgfile,i):
    # print('imgfile:', imgfile)
    #getting the actual image
    origimg = Image.open(imgfile)
    #Transforming the image
    img = transform(origimg).unsqueeze(0)

    img= nnf.interpolate(img, size=(800, 1066), mode='bicubic', align_corners=False)
    
    img_to_save=img.numpy().transpose(0,2,3,1).astype(np.float32)
    
    img_to_save.tofile("raw/"+filenames[i].split(".")[0]+".raw")
    
filenames = os.listdir("val2017") ## change val2017 to the folder name where you have your dataset images.
for i in range(0,len(filenames)):
    if "jpg" in filenames[i].lower():
        detect("val2017/"+filenames[i],i)

mkdir: cannot create directory ‘raw’: File exists


In [12]:
%%bash
find ./raw -name *.raw > list.txt

### Getting the Quantized Model

In [13]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-quantize --input_dlc models/detr_resnet101_fp32.dlc --input_list list.txt  --output_dlc models/detr_resnet101_w8a8.dlc 

[INFO] AISW SDK environment set
[INFO] QNN_SDK_ROOT: /opt/qcom/aistack/qairt/2.25.0.240728
[INFO] SNPE_ROOT: /opt/qcom/aistack/qairt/2.25.0.240728


[INFO] InitializeStderr: DebugLog initialized.
[INFO] Processed command-line arguments
[INFO] Quantized parameters


     0.5ms [  INFO ] Inferences will run in sync mode
     1.2ms [  INFO ] Initializing logging in the backend. Callback: [0x557288fb2b60], Log Level: [3]
     1.3ms [  INFO ] No BackendExtensions lib provided;initializing NetRunBackend Interface
     0.6ms [  INFO ] [QNN_CPU] CpuBackend creation start
     0.6ms [  INFO ] [QNN_CPU] CpuBackend creation end
     0.8ms [  INFO ] [QNN_CPU] QnnContext create start
     0.8ms [  INFO ] [QNN_CPU] QnnContext create end
     2.7ms [  INFO ] Entering QuantizeRuntimeApp flow
     1.5ms [  INFO ] [QNN_CPU] CpuGraph creation start
     1.7ms [  INFO ] [QNN_CPU] CpuGraph creation end
     1.7ms [  INFO ] [QNN_CPU] QnnGraph create end
   130.7ms [  INFO ] [QNN_CPU] QnnGraph finalize start
   211.8ms [  INFO ] [QNN_CPU] QnnGraph finalize end
   218.0ms [  INFO ] [QNN_CPU] QnnGraph execute start
  4113.5ms [  INFO ] [QNN_CPU] QnnGraph execute end
  4119.0ms [  INFO ] cleaning up resources for input tensors
  4119.1ms [  INFO ] cleaning up resources fo

[INFO] Generated activations
[INFO] Saved quantized dlc to: models/detr_resnet101_w8a8.dlc
[INFO] DebugLog shutting down.


nsors
333881.3ms [  INFO ] cleaning up resources for output tensors
333892.6ms [  INFO ] [QNN_CPU] QnnGraph execute start
337777.3ms [  INFO ] [QNN_CPU] QnnGraph execute end
337776.5ms [  INFO ] cleaning up resources for input tensors
337776.5ms [  INFO ] cleaning up resources for output tensors
337786.3ms [  INFO ] [QNN_CPU] QnnGraph execute start
341769.5ms [  INFO ] [QNN_CPU] QnnGraph execute end
341769.7ms [  INFO ] cleaning up resources for input tensors
341769.7ms [  INFO ] cleaning up resources for output tensors
341780.2ms [  INFO ] [QNN_CPU] QnnGraph execute start
345900.5ms [  INFO ] [QNN_CPU] QnnGraph execute end
345899.9ms [  INFO ] cleaning up resources for input tensors
345899.9ms [  INFO ] cleaning up resources for output tensors
345910.0ms [  INFO ] [QNN_CPU] QnnGraph execute start
349483.5ms [  INFO ] [QNN_CPU] QnnGraph execute end
349483.5ms [  INFO ] cleaning up resources for input tensors
349483.5ms [  INFO ] cleaning up resources for output tensors
349494.6ms [  IN

- For snpe-dlc-graph-prepare fix value of htp_soc.
- Based on the device you will be running set value of <b>--htp_socs. Example sm8650 or sm8550</b>

In [15]:
%%bash
source $SNPE_ROOT/bin/envsetup.sh
snpe-dlc-graph-prepare --input_dlc models/detr_resnet101_w8a8.dlc --htp_socs=sm8650 --set_output_tensors=5848,5856 --output_dlc=models/detr_resnet101_w8a8_gp.dlc

[INFO] AISW SDK environment set
[INFO] QNN_SDK_ROOT: /opt/qcom/aistack/qairt/2.25.0.240728
[INFO] SNPE_ROOT: /opt/qcom/aistack/qairt/2.25.0.240728


[INFO] InitializeStderr: DebugLog initialized.
[INFO] SNPE HTP Offline Prepare: Attempting to create cache for SM8650
[USER_INFO] Target device backend record identifier: HTP_V75_SM8650_8MB
[USER_INFO] No cache record in the DLC matches the target device (HTP_V75_SM8650_8MB). Creating a new record
[USER_INFO] Checking unsigned PD session
[INFO] Attempting to open dynamically linked lib: libHtpPrepare.so
[INFO] dlopen libHtpPrepare.so SUCCESS handle 0x562dc14333a0
[INFO] Found Interface Provider (v2.18)
[USER_INFO] Platform option not set
[USER_INFO] Created ctx=0x1 for Graph Id=0 backend=HTP SNPE Id=0x562dc11426f8
[USER_INFO] Offline Prepare VTCM size(MB) selected = 8
[USER_INFO] Offline Prepare Optimization Level passed = 2
[USER_INFO] Backend Mgr ~Dtor called for backend HTP
[USER_INFO] Cleaning up Context handle=0x1 for Graph Id=0 backend=HTP SNPE Id=0x562dc11426f8
[USER_INFO] Done Cleaning up Context handle=0x1 for Graph Id=0 backend=HTP SNPE Id=0x562dc11426f8
[USER_INFO] BackendTe

**Optional Code blocks**
## Creating Bin and Lib Folder On Device

<b>- Below blocks are completely optional. 
- You have the model already prepared.
- Run below code blocks only if you want to try out model by pushing it device.</b>

In [16]:
%%bash
#source throughput.sh >>dump.txt
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/dsp/lib"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/$ONDEVICE_FOLDER"

* daemon not running; starting now at tcp:localhost:5037
* daemon started successfully
adb: device '503bd507' not found
adb: device '503bd507' not found


CalledProcessError: Command 'b'#source throughput.sh >>dump.txt\nexport DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"\n$DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib" && $DEVICE_SHELL shell "mkdir -p /data/local/tmp/snpeexample/dsp/lib"\n$DEVICE_SHELL shell "mkdir -p /data/local/tmp/$ONDEVICE_FOLDER"\n'' returned non-zero exit status 1.

# Pusing All Bin and Lib Files on to Device
* use hexagon-v75 for sm8650
* use hexagon-v73 for sm8550

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/$SNPE_TARGET_STL /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
$DEVICE_SHELL push $SNPE_ROOT/lib/$SNPE_TARGET_ARCH/*.so /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
$DEVICE_SHELL push $SNPE_ROOT/lib/hexagon-v75/unsigned/*.so /data/local/tmp/snpeexample/dsp/lib
$DEVICE_SHELL push $SNPE_ROOT/bin/$SNPE_TARGET_ARCH/snpe-net-run /data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin

# Pushing Artifacts onto Device

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell "mkdir -p /data/local/tmp/$ONDEVICE_FOLDER"

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL push $RAW_FILE_FOLDER /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $TARGET_INPUT_LIST /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $DLC32 /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push models/detr_resnet101_w8a8_gp.dlc /data/local/tmp/$ONDEVICE_FOLDER
$DEVICE_SHELL push $DLC8 /data/local/tmp/$ONDEVICE_FOLDER

# Inferencing 8-bit DLC onto DSP Runtime

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export SNPE_TARGET_ARCH=aarch64-android
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
export PATH=$PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin
export ADSP_LIBRARY_PATH="/data/local/tmp/snpeexample/dsp/lib;/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp"
export OUTPUT_FOLDER=OUTPUT_8b_DSP
export DLC8=detr_resnet101_w8a8_gp.dlc
export ONDEVICE_FOLDER="detr"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
chmod -R 777 * &&
snpe-net-run --container $DLC8 --input_list list.txt  --set_unconsumed_as_output --output_dir=OUTPUT_8b_DSP --use_dsp

# Inferencing 32-bit DLC onto CPU Runtime

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL shell
export SNPE_TARGET_ARCH=aarch64-android
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/lib
export PATH=$PATH:/data/local/tmp/snpeexample/$SNPE_TARGET_ARCH/bin
export OUTPUT_FOLDER=OUTPUT_32b_CPU
export DLC32=detr_resnet101_fp32.dlc
export ONDEVICE_FOLDER="detr"
cd /data/local/tmp/$ONDEVICE_FOLDER &&
snpe-net-run --container $DLC32 --input_list list.txt  --output_dir=OUTPUT_32b_CPU --set_unconsumed_as_output

# Pulling output folder generated on different Precision and Cores

In [None]:
%%bash
export DEVICE_SHELL="adb -H $DEVICE_HOST -s $DEVICE_ID"
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_8b_DSP OUTPUT_8b_DSP
$DEVICE_SHELL pull /data/local/tmp/$ONDEVICE_FOLDER/OUTPUT_32b_CPU OUTPUT_32b_CPU

## Post Processing the Inferenced data

In [None]:
# Sample list of classes
CLASSES = [
    'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
    'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
    'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
    'toothbrush'
]

# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

In [None]:
import matplotlib.pyplot as plt
Image_Paths=[]

with open('list.txt', 'r') as f:
    for line in f:
        Image_Paths.append(line.strip().split("/")[-1].split(".")[0])


count=Image_count=0
if os.path.exists("output")==False:
    os.mkdir("output")
if os.path.exists("output/CPU")==False:
    os.mkdir("output/CPU")
if os.path.exists("output/DSP")==False:
    os.mkdir("output/DSP")
for image in Image_Paths:
    image_path = 'val2017/'+image+".jpg"
    im = Image.open(image_path)
    file1 = 'OUTPUT_32b_CPU/Result_' + str(count) + '/5867.raw'
    file2 = 'OUTPUT_32b_CPU/Result_' + str(count) + '/5860.raw'
    file3 = 'OUTPUT_8b_DSP/Result_' + str(count) + '/5867.raw'
    file4 = 'OUTPUT_8b_DSP/Result_' + str(count) + '/5860.raw'
    a=np.fromfile(file1,np.float32)
    a=a.reshape(100,91)
    tensor_a = torch.from_numpy(a)
    b=np.fromfile(file2,np.float32)
    b=b.reshape(1,100,4)
    tensor_b = torch.from_numpy(b)

    c=np.fromfile(file3,np.float32)
    c=c.reshape(100,91)
    tensor_c = torch.from_numpy(c)
    d=np.fromfile(file4,np.float32)
    d=d.reshape(1,100,4)
    tensor_d = torch.from_numpy(d)


    
    probas = tensor_a
    keep = probas.max(-1).values > 0.9
    bboxes_scaled = rescale_bboxes(tensor_b[0, keep], im.size)
    print("CPU FP32 Inference Result")
    plot_results(im, probas[keep], bboxes_scaled,Image_count)
    Image_count=Image_count+1

    probas = tensor_c
    keep = probas.max(-1).values > 0.9
    bboxes_scaled = rescale_bboxes(tensor_d[0, keep], im.size)
    print("DSP INT8 Inference Result")
    plot_results(im, probas[keep], bboxes_scaled,Image_count)
    Image_count=Image_count+1
    count=count+1