In [1]:
import cv2
import numpy as np
from tkinter import filedialog
import json
import os
from tflite_runtime.interpreter import Interpreter, load_delegate
import threading
from queue import Queue
import time
from sklearn.metrics.pairwise import euclidean_distances
import tkinter as tk

class ObjectDetector:
    def __init__(self, model_path, label_path, threshold=0.3):
        self.threshold = threshold
        self.interpreter = Interpreter(model_path=model_path, experimental_delegates=[load_delegate('edgetpu.dll')])
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.input_shape = self.input_details[0]['shape'][1:]
        self.class_labels = self.load_class_labels(label_path)

        self.frame_count = 0
        self.start_time = time.time()

    def load_class_labels(self, label_path):
        with open(label_path, 'r') as json_file:
            class_labels = json.load(json_file)
        return class_labels

    def preprocess_frame(self, frame):
        image_resized = cv2.resize(frame, (self.input_shape[0], self.input_shape[1]))
        image_float32 = image_resized.astype(np.float32)
        image_normalized = image_float32 / 255.0
        image_with_batch = np.expand_dims(image_normalized, 0)
        return image_with_batch

    def nms(self, boxes, scores, threshold):
        if len(boxes) == 0:
            return []
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        order = scores.argsort()[::-1]
        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])
            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)
            overlap = (w * h) / areas[order[1:]]
            inds = np.where(overlap <= threshold)[0]
            order = order[inds + 1]
        return keep
    
    def perform_inference(self, frame, preprocessed_frame, orig_h, orig_w):
        self.interpreter.set_tensor(self.input_details[0]['index'], preprocessed_frame)
        self.interpreter.invoke()
        output_data = self.interpreter.get_tensor(self.output_details[0]['index'])
        output = np.copy(output_data[0])

        boxes = []
        scores = []

        for i in range(output.shape[1]):
            detection = output[:, i]
            x_center, y_center, width, height = detection[:4]
            x1 = max(0, int((x_center - width / 2) * orig_w))
            y1 = max(0, int((y_center - height / 2) * orig_h))
            x2 = min(orig_w - 1, int((x_center + width / 2) * orig_w))
            y2 = min(orig_h - 1, int((y_center + height / 2) * orig_h))
            score = np.max(detection[4:])
            boxes.append([x1, y1, x2, y2])
            scores.append(score)

        boxes = np.array(boxes)
        scores = np.array(scores)
        keep = self.nms(boxes, scores, 0.7)

        for idx in keep:
            x1, y1, x2, y2 = boxes[idx]
            score = scores[idx]
            detection = output[:, idx]
            cls = np.argmax(output[:, idx][4:])
            if score >= self.threshold:
                class_name = self.class_labels[str(cls)]
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), thickness=2)
                cv2.putText(frame, f"{class_name}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        self.frame_count += 1

    def get_fps(self):
        current_time = time.time()
        elapsed_time = current_time - self.start_time
        if elapsed_time > 0:
            fps = self.frame_count / elapsed_time
            return fps
        else:
            return 0

    def detect_objects(self, frame):
        preprocessed_frame = self.preprocess_frame(frame)
        orig_h, orig_w = frame.shape[:2]
        self.perform_inference(frame, preprocessed_frame, orig_h, orig_w)

    def run_detection(self, cap, output_queue):
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            output_queue.put(frame)

        cap.release()
        output_queue.put(None)

class App:
    def __init__(self, root):
        self.root = root
        self.root.title("Object Detection")
        self.detector = None

        select_button = tk.Button(root, text="Select Video", command=self.open_video)
        select_button.pack()

    def open_video(self):
        file_path = filedialog.askopenfilename()
        if file_path:
            cap = cv2.VideoCapture(file_path)
            output_queue = Queue()
            self.detector = ObjectDetector(
                model_path='yolov8m_saved_model\yolov8m_integer_quant.tflite',
                label_path='C:/object-detection-coral/label_files/labels_coco.json'
            )
            detection_thread = threading.Thread(target=self.detector.run_detection, args=(cap, output_queue))
            detection_thread.start()
            self.process_output(output_queue)

    def process_output(self, output_queue):
        while True:
            frame = output_queue.get()
            if frame is None:
                break
            self.detector.detect_objects(frame)
            cv2.putText(frame, f"FPS: {self.detector.get_fps():.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.imshow('Object Detection', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

if __name__ == "__main__":
    root = tk.Tk()
    app = App(root)
    root.mainloop()
