In [5]:
import cv2
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk
import numpy as np
import os


video_paths = []           # List of loaded video paths
cap = None                 # Video capture object
current_video_index = 0    # Index of the currently selected video
total_frames = 0           # Total number of frames in the video
is_playing = False         # Indicates if video is playing


net = None
classes = []
output_layers = []


root = None
video_label = None
info_label = None
play_btn = None
video_listbox = None
seek_var = None
seek_bar = None


def load_yolo():
# Load YOLO model, weights, and class names.
    global net, classes, output_layers
    net = cv2.dnn.readNet("cfg/yolov3.weights", "cfg/yolov3.cfg")
    
    with open("cfg/coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]
    
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

def setup_gui():
# Builds the GUI layout using Tkinter widgets.
    global root, video_label, info_label, play_btn, video_listbox, seek_var, seek_bar

    root.configure(bg="white")
    main_frame = tk.Frame(root, bg="white")
    main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

    # Where video frames are shown
    video_label = tk.Label(main_frame, bg="black")
    video_label.pack(fill=tk.BOTH, expand=True)

    # Control buttons
    control_frame = tk.Frame(main_frame, bg="white")
    control_frame.pack(fill=tk.X, pady=10)

    # Load button
    load_btn = tk.Button(control_frame, text="Load Videos", command=load_videos)
    load_btn.pack(side=tk.LEFT, padx=5)

    # Play button
    play_btn = tk.Button(control_frame, text="Play", command=toggle_play)
    play_btn.pack(side=tk.LEFT, padx=5)

    # Status/info text
    info_label = tk.Label(control_frame, text="Ready to load videos", bg="white")
    info_label.pack(side=tk.LEFT, padx=20)

    # List of videos
    video_listbox = tk.Listbox(control_frame, height=3)
    video_listbox.pack(side=tk.RIGHT, padx=5)
    video_listbox.bind("<<ListboxSelect>>", select_video)

    # Seek bar to jump between frames
    seek_var = tk.DoubleVar()
    seek_bar = ttk.Scale(main_frame, from_=0, to=100, orient="horizontal", variable=seek_var, command=seek_video)
    seek_bar.pack(fill=tk.X, padx=10, pady=5)

def load_videos():
# Load video files from the 'videos' folder.
    global video_paths
    video_paths.clear()

    video_dir = "videos"
    if os.path.exists(video_dir):
        for file in os.listdir(video_dir):
            if file.endswith((".mp4", ".avi", ".mov")):
                video_paths.append(os.path.join(video_dir, file))

    # Populate listbox
    video_listbox.delete(0, tk.END)
    for path in video_paths:
        video_listbox.insert(tk.END, os.path.basename(path))

    # Update status text
    if video_paths:
        info_label.config(text=f"Loaded {len(video_paths)} videos")
    else:
        info_label.config(text="No videos found in 'videos' folder")

def select_video(event):
# Triggered when a user selects a video from the list.
    global current_video_index
    selection = event.widget.curselection()
    if selection:
        current_video_index = selection[0]
        initialize_video()

def initialize_video():
# Setup video capture for the selected video.
    global cap, total_frames, is_playing
    if cap:
        cap.release()

    cap_path = video_paths[current_video_index]
    cap = cv2.VideoCapture(cap_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    seek_bar.config(to=100)
    is_playing = False
    play_btn.config(text="Play")

def toggle_play():
# Start or pause the video playback.
    global is_playing
    if not cap or not cap.isOpened():
        return
    is_playing = not is_playing
    play_btn.config(text="Pause" if is_playing else "Play")
    if is_playing:
        update_frame()

def update_frame():
# Read, process, and show next video frame
    global cap
    if not is_playing or not cap or not cap.isOpened():
        return

    ret, frame = cap.read()
    if not ret:
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        return

    frame = detect_objects(frame)  # Applying YOLO

    # Convert and show frame
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame_rgb)
    img = ImageTk.PhotoImage(image=img)
    video_label.img = img
    video_label.config(image=img)

    # Update seek bar
    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
    seek_var.set((current_frame / total_frames) * 100)

    # Loop
    root.after(10, update_frame)

def seek_video(value):
# Jump to a specific frame based on seek bar value.
    if cap and cap.isOpened():
        frame_number = int((float(value) / 100) * total_frames)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

def detect_objects(frame):
# Perform YOLO detection and annotate detected objects.
    height, width, _ = frame.shape

    # Prepare frame for YOLO input
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []

    # Analyze detections
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Get coordinates of box
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Filter overlapping boxes
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Draw plain white boxes and labels
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = f"{classes[class_ids[i]]} {confidences[i]:.2f}"
            cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 255, 255), 2)
            cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    return frame

if __name__ == "__main__":
    root = tk.Tk()
    root.title("PAU Video Object Detector")
    root.geometry("1000x700")
    load_yolo()     
    setup_gui()     
    root.mainloop() 
