# DPU example: Resnet50
----

## 1. Prepare the overlay
We will download the overlay onto the board. 

In [1]:
from pynq_dpu import DpuOverlay
overlay = DpuOverlay("dpu.bit")

## 2. Utility functions

In this section, we will prepare a few functions for later use.

In [2]:
import os
import time
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
overlay.load_model("kv260_tipu12.xmodel")

Let's first define a few useful preprocessing functions. These functions
will make sure the DPU can take input images with arbitrary sizes.

In [4]:
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94

MEANS = [_B_MEAN,_G_MEAN,_R_MEAN]

def resize_shortest_edge(image, size):
    H, W = image.shape[:2]
    if H >= W:
        nW = size
        nH = int(float(H)/W * size)
    else:
        nH = size
        nW = int(float(W)/H * size)
    return cv2.resize(image,(nW,nH))

def mean_image_subtraction(image, means):
    B, G, R = cv2.split(image)
    B = B - means[0]
    G = G - means[1]
    R = R - means[2]
    image = cv2.merge([R, G, B])
    return image

def BGR2RGB(image):
    B, G, R = cv2.split(image)
    image = cv2.merge([R, G, B])
    return image

def central_crop(image, crop_height, crop_width):
    image_height = image.shape[0]
    image_width = image.shape[1]
    offset_height = (image_height - crop_height) // 2
    offset_width = (image_width - crop_width) // 2
    return image[offset_height:offset_height + crop_height, offset_width:
                 offset_width + crop_width, :]

def normalize(image):
    image=image/256.0
    image=image-0.5
    image=image*2
    return image

def preprocess_fn(image, image_path, crop_height = 224, crop_width = 224):
    if image is None:
        raise ValueError(f"Error reading image at {image_path}")
    image = resize_shortest_edge(image, 256)
    #image = mean_image_subtraction(image, MEANS)
    image = central_crop(image, crop_height, crop_width)
    image = BGR2RGB(image)
    image = normalize(image)
    return image

We will also define a few functions to calculate softmax and provide 
the output class after running a DPU task.

In [5]:
def calculate_softmax(data):
    e_x = np.exp(data - np.max(data))
    return e_x / e_x.sum()

def predict_label(softmax):
    with open("class_to_order.txt", "r") as f:
        lines = f.readlines()
        lines = [item.strip() for item in lines]
        class_arg = np.argmax(softmax)
        class_name = lines[class_arg]
    return class_name, class_arg

Keep in mind that our original images are 640x480 so we need to preprocess them
later to make sure it fits our model.

## 3. Use VART
Now we should be able to use VART to do image classification.

In [6]:
dpu = overlay.runner

inputTensors = dpu.get_input_tensors()
outputTensors = dpu.get_output_tensors()

shapeIn = tuple(inputTensors[0].dims)
shapeOut = tuple(outputTensors[0].dims)
outputSize = int(outputTensors[0].get_data_size() / shapeIn[0])

softmax = np.empty(outputSize)

We can define a few buffers to store input and output data. They will be reused
during multiple runs.

In [7]:
output_data = [np.empty(shapeOut, dtype=np.float32, order="C")]
input_data = [np.empty(shapeIn, dtype=np.float32, order="C")]
image = input_data[0]

In [8]:
def count_total_images(dataset_folder):
    total = 0
    for class_folder in os.listdir(dataset_folder):
        class_path = os.path.join(dataset_folder, class_folder)
        if os.path.isdir(class_path):
            total += len(os.listdir(class_path))
    return total

def is_image_file(file_path):
    valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
    ext = os.path.splitext(file_path)[-1].lower()
    return ext in valid_extensions

Remember that we have a list of `original_images`. 
We can now define a new function `run()` which takes the image index as 
the input, and calculate the softmax as the classification result.
With the argument `display` set to `True`, the original image as well as the
predicted label can be rendered.

It is obvious that the range of `image_index` should be [0, `total_images`-1].

In [13]:
import threading
from queue import Queue
import time

# Define a queue for image paths to be processed
image_queue = Queue()

correct = 0  # Global variable for correct predictions
labels = []
labels_in_correct_order = []
predictions_list = []
index = 0

# Lock for synchronization
lock = threading.Lock()

# Function to perform inference on images
def inference_worker():
    global correct
    global index
    while True:
        image_path = image_queue.get()
        if image_path is None:
            break
        if not is_image_file(image_path):
            #print(f"Skipping non-image file: {image_path}")
            continue
            
        with lock:
            current_index = index
            index += 1
        
        preprocessed_image = preprocess_fn(cv2.imread(image_path), image_path)
        image[0,...] = preprocessed_image.reshape(shapeIn[1:])
        
        job_id = dpu.execute_async(input_data, output_data)
        dpu.wait(job_id)
        
        temp = [j.reshape(1, outputSize) for j in output_data]
        softmax = calculate_softmax(temp[0][0])
        predicted_label = predict_label(softmax)
        
        with lock:
            predictions_list.append(predicted_label[0])
            labels_in_correct_order.append(labels[current_index])
            if predicted_label[0] == labels[current_index]:
                correct += 1
        
        image_queue.task_done()

# Number of threads
num_threads = 4  # Adjust based on system resources and performance

# Start threads
threads = []
for _ in range(num_threads):
    t = threading.Thread(target=inference_worker)
    t.start()
    threads.append(t)

image_folder = "test_tipu12/test"    

# Queue up image paths for processing
for class_folder in os.listdir(image_folder):
    class_path = os.path.join(image_folder, class_folder)
    if not os.path.isdir(class_path):
        continue
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        if not is_image_file(image_path):
            continue
        image_queue.put(image_path)
        labels.append(class_folder)

start_time = time.time()

# Wait for all threads to complete processing
image_queue.join()

# Stop threads by sending None to the queue
for _ in range(num_threads):
    image_queue.put(None)

# Join threads
for t in threads:
    t.join()

# Calculate FPS
total_images = count_total_images(image_folder)
total_time = time.time() - start_time
fps = total_images / total_time

print(f"Total time for {total_images} images: {total_time:.2f} seconds")
print(f"FPS: {fps:.2f}")
print(f"Correct predictions: {correct}")
print(f"Accuracy: {correct/len(labels)}")

Total time for 3232 images: 56.31 seconds
FPS: 57.40
Correct predictions: 2332
Accuracy: 0.7215346534653465


In [14]:
from sklearn.metrics import confusion_matrix

def confusion_matrix_stats(conf_mat):
    # Counting true positives, false positives, true negatives, and false negatives
    true_positives = np.diag(conf_mat)
    false_positives = np.sum(conf_mat, axis=1) - true_positives
    false_negatives = np.sum(conf_mat, axis=0) - true_positives
    true_negatives = np.sum(conf_mat) - true_positives - false_positives - false_negatives
    # F1 Score
    f1_score = np.where((true_positives + false_positives + false_negatives) > 0, 2 * true_positives / (2 * true_positives + false_positives + false_negatives), 0)
    return f1_score, true_positives, false_positives, false_negatives, true_negatives

labels.reverse()
conf_mat = confusion_matrix(labels_in_correct_order, predictions_list)

f1_score, true_positives, false_positives, false_negatives, true_negatives = confusion_matrix_stats(conf_mat)

f1_score_mean = np.mean(f1_score[:])

print(f"f1_score_mean: {f1_score_mean}")

f1_score_mean: 0.7191153930350014


 We will need to remove references to `vart.Runner` and let Python garbage-collect
the unused graph objects. This will make sure we can run other notebooks without
any issue.

In [22]:
del overlay
del dpu

----

Copyright (C) 2021 Xilinx, Inc

SPDX-License-Identifier: Apache-2.0 License

----

----