# Performance testing

### Without threading

In [5]:
import cv2
import torch

# testing
from time import time

# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device, '\n')

# Load Model (yolov5s)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Set confidence treshold
model.conf = 0.1

# Only detect bottles
model.classes = [39]

# test image
frame = cv2.imread("images/bottle.jpg")

# set amount of detections 
DETECTIONS = 1000

# start timer
start = time()

# --------------------- START DETECTING ------------------------ #
for _ in range(DETECTIONS):
	# detect
	results = model(frame)
	if results.pandas().xyxy[0].empty: continue # turn this off to see the true performance (should gain you around a 20% performance improvement)

# stop timer
end = time()
print(f"{DETECTIONS} detections took {end - start} seconds")
print(f"FPS: {DETECTIONS/(end - start)}")

Using cache found in /home/koen/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-4-20 torch 1.11.0+cu102 CUDA:0 (NVIDIA GeForce GTX 1070 Ti, 8116MiB)

Fusing layers... 


Using device: cuda 



YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


1000 detections took 9.848912477493286 seconds
FPS: 101.53405284950982


### With threading

In [6]:
import cv2
import torch

# testing
from time import time
from concurrent.futures import ThreadPoolExecutor

# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device, '\n')

# Load Model (yolov5s)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Set confidence treshold
model.conf = 0.1

# Only detect bottles
model.classes = [39]

# test image
frame = cv2.imread("images/bottle.jpg")

# set amount of detections 
DETECTIONS = 1000

# worker function
def task(detections):
	for d in detections:
		if d.pandas().xyxy[0].empty: continue

detections = []

# init thread pool
executor = ThreadPoolExecutor(1)

# start timer
start = time()

# --------------------- START DETECTING ------------------------ #
for _ in range(DETECTIONS):
	# detect
	results = model(frame)
	# after five detections offload to worker threads
	if len(results) == 100:
		executor.map(task, detections)
		detections.clear()
	# append data to offload
	detections.append(results)

# stop timer
end = time()
print(f"{DETECTIONS} detections took {end - start} seconds")
print(f"FPS: {DETECTIONS/(end - start)}")

Using cache found in /home/koen/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-4-20 torch 1.11.0+cu102 CUDA:0 (NVIDIA GeForce GTX 1070 Ti, 8116MiB)

Fusing layers... 


Using device: cuda 



YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


1000 detections took 8.339622020721436 seconds
FPS: 119.90951118831319


# Real world performance testing

### Without threading

In [3]:
import cv2
import torch
from copy import copy

# testing
from time import time

# our modules
from detection import Detection

# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device, '\n')

# Load Model (yolov5s)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Set confidence treshold
model.conf = 0.1

# Only detect bottles
model.classes = [39]

# test image
frame = cv2.imread("images/bottle.jpg")

# set amount of detections 
DETECTIONS = 1000

# start timer
start = time()

# --------------------- START DETECTING ------------------------ #
for _ in range(DETECTIONS):
	base_img = copy(frame) # necessary
	# detect
	results = model(frame)

	# Reformat results to pandas dataframe
	df = results.pandas().xyxy[0]

	# If empty continue with next frame
	# else send detection data
	if (df.empty): continue

	lat, lon = 0, 0
	Detection(df, lat, lon, base_img).send("http://localhost/api/test", "test")

# stop timer
end = time()
print(f"{DETECTIONS} detections took {end - start} seconds")
print(f"FPS: {DETECTIONS/(end - start)}")

Using cache found in /home/koen/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-4-20 torch 1.11.0+cu102 CUDA:0 (NVIDIA GeForce GTX 1070 Ti, 8116MiB)

Fusing layers... 


Using device: cuda 



YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


1000 detections took 15.7927565574646 seconds
FPS: 63.32016810120082


### With threading

In [4]:
import cv2
import torch
from copy import copy

# testing
from time import time
from concurrent.futures import ThreadPoolExecutor

# our modules
from detection import Detection

# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device, '\n')

# Load Model (yolov5s)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Set confidence treshold
model.conf = 0.1

# Only detect bottles
model.classes = [39]

# test image
frame = cv2.imread("images/bottle.jpg")

# set amount of detections 
DETECTIONS = 1000

# worker function
def task(detections):
	for d in detections:
		df = d[0].pandas().xyxy[0]
		if df.empty: continue
		Detection(df, d[1], d[2], d[3]).send("http://localhost/api/test", "test")

detections = []

# init thread pool
executor = ThreadPoolExecutor(1)

# start timer
start = time()

# --------------------- START DETECTING ------------------------ #
for _ in range(DETECTIONS):
	base_img = copy(frame) # necessary
	# detect
	results = model(frame)
	# after five detections offload to worker threads
	if len(results) == 100:
		executor.map(task, detections)
		detections.clear()
	# get location
	lat, lon = 0, 0
	# append data to offload
	detections.append([results, lat, lon, base_img])

# stop timer
end = time()
print(f"{DETECTIONS} detections took {end - start} seconds")
print(f"FPS: {DETECTIONS/(end - start)}")

Using cache found in /home/koen/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-4-20 torch 1.11.0+cu102 CUDA:0 (NVIDIA GeForce GTX 1070 Ti, 8116MiB)

Fusing layers... 


Using device: cuda 



YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


1000 detections took 8.367568016052246 seconds
FPS: 119.50903752220616
