In [None]:
import cv2
import os
import time

from pycoral.adapters.common import input_size
from pycoral.adapters.detect import get_objects
from pycoral.utils.dataset import read_label_file
from pycoral.utils.edgetpu import make_interpreter
from pycoral.utils.edgetpu import run_inference

# Define paths to model and label directories
default_path = 'models' # location of model weights and labels
model_name = 'helov1_efficientdet0_edgetpu.tflite'
label_name = 'ewasp_label.txt'

model_path = default_path + "/" + model_name
label_path = default_path + "/" + label_name

# Define path for video to test model against
video_path = "data/20250409_1046MST.mp4"

# Define thresholds and number of classes to output
SCORE_THRESH = 0.1
NUM_CLASSES = 1

In [None]:
# Function that takes in a video and returns all the frames in a numpy array
# path = a string that represents the absolute path of the video
# start = (optional) starting frame to return, default 0
# end = (optional) last frame to return, default -1 (last frame)
def load_video(path, start=0, end=-1):
    frames = []
    count = 0
    cap = cv2.VideoCapture(path)

    # Catch edge case of video failure
    if not cap.isOpened():
        raise ValueError(f"Error opening video!")
    
    # Get video parameters
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Catch edge case of no end frame specified
    if end == -1:
        end = frame_count
    

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if count > start and count <= end:
            frames.append(frame[:, :, 0].copy()) # only save the first channel for grayscale image
        count += 1
        
    cap.release()
    
    # Fancy print details
    print(f"===== Video Details ======")
    print(f"Video Length: {round((frame_count/fps)/60, 2)} min")
    print(f"FPS: {fps}")
    print(f"Frame Count: {frame_count}")
    print(f"Frame Width: {frame_width}")
    print(f"Frame Height: {frame_height}")
    print(f"Frame Shape: {frames[0].shape}")
    print()
          
    return frames

In [None]:
# [FUNCTION] Modify image to label objs and score
def append_objs_to_img(cv2_im, inference_size, objs, labels):
    height, width = cv2_im.shape
    scale_x, scale_y = width / inference_size[0], height / inference_size[1]
    for obj in objs:
        if obj.score > 0.1:
            bbox = obj.bbox.scale(scale_x, scale_y)
            x0, y0 = int(bbox.xmin), int(bbox.ymin)
            x1, y1 = int(bbox.xmax), int(bbox.ymax)
    
            percent = int(100 * obj.score)
            label = '{}% {}'.format(percent, labels.get(obj.id, obj.id))
    
            cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), (0, 255, 0), 2)
            cv2_im = cv2.putText(cv2_im, label, (x0, y0+30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 2)
    return cv2_im

In [None]:
# Display Dependencies
import IPython
import IPython.display
from io import BytesIO
import PIL.Image

# [FUNCTION] Prepare .ipynb display
def show_rgb_image_to_display(image_rgb, display):
    """
    Displays a color image in the Jupyter Notebook.
    Assumes image is in RGB format.
    """
    io = BytesIO()
    image = cv2.cvtColor(image_rgb, cv2.COLOR_BGR2RGB)
    PIL.Image.fromarray(image).save(io, 'jpeg')
    img_display = IPython.display.Image(data=io.getvalue())
    display.update(img_display)

In [None]:
### FRAME EXTRACTION BLOCK ###

fps = 60
start = 25 # in seconds, multiply by fps before passing into video load function
end = 90 # -1 represents end of video
frames = load_video(video_path, start*fps, end*fps)

# Print extracted footage details
print(f"===== Extracted Frame Details ======")
print(f"Frame Count: {len(frames)}")
print(f"Video Length: {round(len(frames)/fps/60, 2)} min")
print(f"Frame Shape: {frames[0].shape}")

In [None]:
# The actual display
display = IPython.display.display('', display_id=1)

In [None]:
# STEP 1: Load model and labels using pycoral.utils
print('Loading {} with {} labels.'.format(model_path, label_path))
interpreter = make_interpreter(model_path)
interpreter.allocate_tensors()
labels = read_label_file(label_path)
inference_size = input_size(interpreter)

# Loop through each frame and pass through model
# Measure inference time for each pass
inf_time = []
for frame in frames:
    # STEP 2: Preprocess image to the size and shape accepted by model
    rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Needs to be 3-channel when fed into model
    rgb_image = cv2.resize(rgb_image, inference_size)

    time_cp1 = time.time()
    # STEP 4: Let the model do the work
    run_inference(interpreter, rgb_image.tobytes())
    time_cp2 = time.time()
    
    # STEP 5: Get objects detected from the model
    objs = get_objects(interpreter, SCORE_THRESH)[:NUM_CLASSES]

    # STEP 6: Label detected objects to frame
    image = append_objs_to_img(frame, inference_size, objs, labels)

    # STEP 7: Show the image to the display
    show_rgb_image_to_display(image, display)

    # Calculate inference time and add to list
    inf_time.append(round((time_cp2 - time_cp1)*1000, 2))

In [None]:
import matplotlib.pyplot as plt

# Graph inference times as a line chart
plt.figure(figsize=(10, 5))
plt.plot(inf_time, marker='o', linestyle='-')
plt.title("Inference Time per Sample")
plt.xlabel("Sample Index")
plt.ylabel("Inference Time (ms)")
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate average fps
avg = 0
for inf in inf_time:
    avg += inf
avg /= len(inf_time)
print(f"Average FPS: {1/(avg/1000)}")