In [1]:
import cv2
import tkinter as tk
from tkinter import filedialog, Scale, HORIZONTAL, Button, Label, StringVar, OptionMenu
from PIL import Image, ImageTk
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("cfg/yolov3.weights", "cfg/yolov3.cfg")
with open("cfg/coco.names", "r") as f:
    classes = f.read().splitlines()
layer_names = net.getUnconnectedOutLayersNames()

# Global variables
caps = []
cap = None
paused = True
video_loaded = False
total_frames = 0
fps = 30
current_frame = 0
window = None
label = None
scale = None
play_btn = None
frame_info = None
video_selector = None
video_paths = []


def load_videos():
    global caps, video_paths, cap, current_frame, total_frames, fps, video_loaded
    file_paths = filedialog.askopenfilenames(
        filetypes=[("Video files", "*.mp4 *.avi *.mov *.mkv")]
    )
    if not file_paths:
        return
    video_paths = file_paths[:3]
    caps.clear()
    for path in video_paths:
        caps.append(cv2.VideoCapture(path))
    video_var.set(video_paths[0])
    cap = caps[0]
    configure_video()
    update_frame()


def configure_video():
    global cap, total_frames, fps, scale, current_frame, video_loaded
    if not cap:
        return
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    scale.config(to=total_frames)
    current_frame = 0
    video_loaded = True


def switch_video(*args):
    global cap, current_frame
    selected = video_var.get()
    if selected in video_paths:
        cap = caps[video_paths.index(selected)]
        configure_video()
        update_frame()


def toggle_play():
    global paused
    if not video_loaded:
        return
    paused = not paused
    play_btn.config(text="Pause" if not paused else "Play")
    if not paused:
        play_video()


def play_video():
    global current_frame, paused
    if cap.isOpened() and not paused:
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        ret, frame = cap.read()
        if not ret:
            return
        display_frame(frame)
        scale.set(current_frame)
        current_frame += 1
        if current_frame >= total_frames:
            paused = True
            play_btn.config(text="Play")
            return
        delay = int(1000 / fps)
        window.after(delay, play_video)


def display_frame(frame):
    global label, frame_info
    height, width, _ = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(layer_names)

    boxes, confidences, class_ids = [], [], []
    for out in outputs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x, center_y = int(detection[0] * width), int(
                    detection[1] * height
                )
                w, h = int(detection[2] * width), int(detection[3] * height)
                x, y = int(center_x - w / 2), int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    for i in indexes.flatten():
        x, y, w, h = boxes[i]
        label_text = f"{classes[class_ids[i]]} {round(confidences[i], 2)}"
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(
            frame,
            label_text,
            (x, y - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0, 255, 0),
            2,
        )

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(rgb)
    imgtk = ImageTk.PhotoImage(image=img)
    label.imgtk = imgtk
    label.config(image=imgtk)
    frame_info.config(text=f"Frame: {current_frame}/{total_frames}")


def update_frame():
    global current_frame
    if not video_loaded:
        return
    cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
    ret, frame = cap.read()
    if ret:
        display_frame(frame)


def seek_video(val):
    global current_frame
    if not video_loaded:
        return
    current_frame = int(val)
    update_frame()


def load_gui():
    global window, label, scale, play_btn, frame_info, video_selector, video_var
    window = tk.Tk()
    window.title("YOLO Object Detection")
    window.configure(bg="#1c1c1c")

    video_var = StringVar()

    label = tk.Label(window, bg="#1c1c1c")
    label.pack()

    scale = Scale(
        window,
        from_=0,
        to=100,
        orient=HORIZONTAL,
        length=400,
        command=seek_video,
        bg="#2c2c2c",
        fg="white",
        troughcolor="#444",
        highlightthickness=0,
    )
    scale.pack()

    frame_info = Label(window, text="Frame: 0/0", bg="#1c1c1c", fg="white")
    frame_info.pack()

    btn_frame = tk.Frame(window, bg="#1c1c1c")
    btn_frame.pack(pady=10)

    Button(
        btn_frame,
        text="Load Up to 3 Videos",
        command=load_videos,
        bg="#333",
        fg="white",
    ).pack(side=tk.LEFT, padx=5)

    play_btn = Button(
        btn_frame, text="Play", command=toggle_play, bg="#333", fg="white"
    )
    play_btn.pack(side=tk.LEFT, padx=5)

    video_selector = OptionMenu(window, video_var, "")
    video_selector.config(bg="#333", fg="white")
    video_var.trace("w", switch_video)
    video_selector.pack(pady=10)

    window.mainloop()


if __name__ == "__main__":
    load_gui()