# Low power person detection on UAVs

This is the notebook with holds the complete pipeline for our project in TinyML!

Our goal is to deploy optimized person detection models on edge devices with regards to trade-offs between power-consumption, inference speed and accuracy. 

We load, retrain, optimize, benchmark and deploy these models in this jupyternotebook, which is a compressed version of the source code in this repository


In [None]:
from ultralytics import YOLO
import tensorflow as tf
import tensorflow_hub as hub
import torch
import os


## We start with loading different models 
At the start of the project we used models from EfficientDet, Fomo, Yolo and mobilenet_ssd. After comparison we decided to only move forward with the YOLO model, therefore later code is written only for the yolo architecture.

For EfficientDet and Mobilenet-ssd we use tensorflow-hub to get the models, while FOMO is only available via manual download from Edge-Impulse. The usage of YOLO is greatly simplified by using the ultralytics library for YOLO, which handles download and provides a training framework

In [None]:
# load_model.py

def load_yolo(model_name : str, model_name_ext: str):
    """
    Loads a YOLO modle
    """
    os.makedirs("models", exist_ok=True)

    model = YOLO(model_name_ext)
    exported_path = model.export(format="saved_model")

    return exported_path


def load_mobilenet_ssd(model_name: str, model_url: str = "https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2"):
    """Loads MobileNet SSD from TensorFlow Hub"""
    os.makedirs("models", exist_ok=True)
    
    model = hub.load(model_url)
    saved_model_path = f"{model_name}_saved_model"
    tf.saved_model.save(model, saved_model_path)
    return saved_model_path


def load_efficientdet(model_name: str, model_url: str = "https://tfhub.dev/tensorflow/efficientdet/d0/1"):
    """Loads EfficientDet from TensorFlow Hub"""
    os.makedirs("models", exist_ok=True)

    model = hub.load(model_url)
    saved_model_path = f"{model_name}_saved_model"
    tf.saved_model.save(model, saved_model_path)
    return saved_model_path

In [None]:

# download the yolo model
model_name = "yolo11n"
model_name_ext = "yolo11n.pt"
yolo_saved_model_path = load_yolo(model_name, model_name_ext)




We now downloaded Yolo11n.pt and will continue by using one of two domain-specific datasets to retrain and with that finetune these models for the deployment of person-detection on UAVs

In [None]:
#train.py

print(torch.__version__)
print(torch.version.cuda)
print("PyTorch CUDA: ", torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

model_name = "yolo11n"
dataset_name = "visdrone"
data_path = "train/visdrone.yaml"
image_size = 640
epochs = 100

def train(model_name: str, dataset_name:str, data_path: str, image_size: int, epochs: int):
    # Load a pretrained model
    model = YOLO(model_name + ".pt")
    # Train the model using a custom dataset
    results = model.train(
        data=data_path, 
        device=0,
        epochs=epochs,
        imgsz=image_size,
        batch=16,
        plots=True,
        project="../models",
        name= model_name + "_" + dataset_name + "_" + str(image_size) + "p_" + str(epochs) + "ep"
    )

    return results

def export(data_path: str, best_model_path: str, image_size: int):
    best_model = YOLO(best_model_path)
    best_model.export(format="onnx")
    best_model.export(
        format="tflite",
        imgsz=image_size,
        # project="../models",
        # name="yolo11n_fp32_visdrone"
    )
    best_model.export(
        format="tflite",
        imgsz=image_size,
        half=True,
        # project="../models",
        # name="yolo11n_fp16_visdrone"
    )
    best_model.export(
        format="tflite",
        imgsz=image_size,
        int8=True,
        data=data_path,
        # project="../models",
        # name="yolo11n_int8_visdrone"
    )

from multiprocessing import freeze_support
freeze_support()

print("Starting training ", model_name, " on dataset ", dataset_name,
      " for ", epochs, " epochs with imgsz ", image_size, "p.")

results = train(model_name, dataset_name, data_path, image_size, epochs)
print(results)

best_model_path = f"../models/{model_name}_{dataset_name}_{image_size}p_{epochs}ep/weights/best.pt"

export(data_path, best_model_path, image_size)





### Optimization


After retraining on the domain-specific dataset, we optimize the model for edge deployment and produce six TFLite variants per model:


- float32: default, experimental


- float16 weights-only: default, experimental


- dynamic range int8 (int8 weights, float32 I/O): default, experimental




Key settings:


- We use `tf.lite.Optimize.DEFAULT` for all conversions. The older `OPTIMIZE_FOR_SIZE` and `OPTIMIZE_FOR_LATENCY` are deprecated and behave the same as `DEFAULT`.


- Experimental variants add `tf.lite.Optimize.EXPERIMENTAL_SPARSITY`, which leverages pruned (sparse) weights if the SavedModel was trained with pruning. Note: conversion does not perform pruning itself.


- For float16, we convert weights to fp16 but keep inference input/output types as float32 to match the SavedModel signature.


- We restrict to TFLite built-in ops (`TFLITE_BUILTINS`) and attempt the experimental converter for broader support.

In [None]:


def optimize_model(
    model_name: str,
    model_path: str,
    output_dir: str = "models/optimized_models",
    
):
    """Generate optimized TFLite variants and optionally verify sizes.
   """
  

    os.makedirs(output_dir, exist_ok=True)

    # f32 ( baseline )
    fp32_default = _convert(
        model_name, model_path, output_dir, "float32_default", tf.lite.Optimize.DEFAULT, None, use_sparsity=False
    )
    fp32_experimental = _convert(
        model_name, model_path, output_dir, "float32_experimental", tf.lite.Optimize.DEFAULT, None, use_sparsity=True
    )

    # Float16 (but IO f32)
    fp16_default = _convert(
        model_name, model_path, output_dir, "float16_default", tf.lite.Optimize.DEFAULT, tf.float16, use_sparsity=False
    )
    fp16_experimental = _convert(
        model_name, model_path, output_dir, "float16_experimental", tf.lite.Optimize.DEFAULT, tf.float16, use_sparsity=True
    )

    # Dynamic range int8 (weights int8, float32 IO)
    dyn_int8_default = _convert(
        model_name, model_path, output_dir, "dynamic_int8_default", tf.lite.Optimize.DEFAULT, "dynamic", use_sparsity=False
    )
    dyn_int8_experimental = _convert(
        model_name, model_path, output_dir, "dynamic_int8_experimental", tf.lite.Optimize.DEFAULT, "dynamic", use_sparsity=True
    )

  



def _convert(model_name: str, model_path: str, output_dir: str, suffix: str, optimization, quant_type, use_sparsity: bool = False):
    """Helper to convert a model with specific settings.

    If use_sparsity=True we include EXPERIMENTAL_SPARSITY with DEFAULT so that sparse
    weights ( if they are present ) are encoded more efficiently. This does not perform pruning.
    """


    converter = tf.lite.TFLiteConverter.from_saved_model(model_path)


    optimizations = [optimization] if optimization else [tf.lite.Optimize.DEFAULT]


    if use_sparsity and tf.lite.Optimize.EXPERIMENTAL_SPARSITY not in optimizations:
        # Combine DEFAULT + EXPERIMENTAL_SPARSITY when requested.
        optimizations.append(tf.lite.Optimize.EXPERIMENTAL_SPARSITY)


    converter.optimizations = optimizations

    # restrict to tensorflow builtin ops
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]

    # float16 quantization
    if quant_type == tf.float16:
        converter.target_spec.supported_types = [tf.float16]
    
    elif quant_type == "dynamic":
        # Dynamic range quantization always uses DEFAULT; include sparsity if requested.
        converter.optimizations = [tf.lite.Optimize.DEFAULT] + ([tf.lite.Optimize.EXPERIMENTAL_SPARSITY] if use_sparsity else [])
    
    # try with experimental converter
    converter.experimental_new_converter = True


    try:
        tflite_model = converter.convert()

    except ValueError as e:

        print(f"Skipping {suffix} for {model_name}: {e}")

        return None

    output_path = os.path.join(output_dir, f"{model_name}_{suffix}.tflite")

    with open(output_path, 'wb') as f:

        f.write(tflite_model)


    print(f"{suffix}: {output_path} ({len(tflite_model) / (1024*1024):.2f} MB)")


    return output_path




In [None]:
optimize_model(model_name, best_model_path)

Choosing a optimized model :( SOME MODEL )
we can now run some inference on example pictures to see where stengths and weaknesses of our detection lie

In [None]:
# inference

To gain actual knowledge we run different benchmarks on our models to compare them performance and power wise

In [None]:
from model import Model
import os
import cv2
import time
from memory_profiler import memory_usage
import numpy as np
import psutil
from tqdm import tqdm

P_idle = 2.7
P_max = 7.0

def get_model_type(model_path: str) -> str:
    if 'yolo' in model_path:
        return 'yolo'
    elif 'fomo' in model_path:
        return 'fomo'
    else:
        return None

def get_CPU_temp():
    res = os.popen('vcgencmd measure_temp').readline()
    return float(res.replace("temp=","").replace("'C\n",""))

def benchmark(model_path: str, images_path: str, amount_of_images):
    model_type = get_model_type(model_path)
    model = Model(model_type=model_type, path=model_path)

    time_values = []
    memory_values = []
    temp_values = []
    cpu_usage_values = []
    energy_values = []
    
    for image in tqdm(os.listdir(images_path)[:amount_of_images]):
        image_path = os.path.join(images_path, image)
        img = cv2.imread(image_path)

        start_time = time.time()
        model.inference(img, postprocess=False)
        end_time = time.time()


        mem_usage = memory_usage((model.inference, (img, False, )))

        time_values.append((end_time - start_time) * 1000)
        cpu_usage_values.append(psutil.cpu_percent())
        temp_values.append(get_CPU_temp())
        memory_values.append(np.mean(mem_usage))
        energy_values.append(P_idle + (P_max - P_idle) * (cpu_usage_values[-1] / 100))



    return {
        'inference_time (ms)': np.mean(time_values),
        'memory_usage (MiB)': np.mean(memory_values),
        'cpu_temperature (C)': np.mean(temp_values),
        'cpu_usage (%)': np.mean(cpu_usage_values),
        'energy_consumption (W)': np.mean(energy_values)
    }

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:23<00:00,  2.16it/s]


{'inference_time (ms)': 132.99864292144775,
 'memory_usage (MiB)': 910.1628125,
 'cpu_temperature (C)': 64.192,
 'cpu_usage (%)': 82.64,
 'energy_consumption (W)': 3.5660000000000003}