In [None]:
import cv2
import numpy as np
import os

# Load YOLO
net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
with open('cfg/coco.names', 'r') as f:
    classes = f.read().splitlines()

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Open video file
video_path = 'videos/vid3.mp4'
cap = cv2.VideoCapture(video_path)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    height, width, _ = frame.shape

    # Preprocess frame
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    if len(indexes) > 0:
        for i in indexes:
            i = i[0] if isinstance(i, (list, tuple, np.ndarray)) else i
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[class_ids[i]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"{label} {confidence}", (x, y + 20), font, 2, (255, 255, 255), 2)

    cv2.imshow("Video", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk
import os

# Load YOLO
net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
with open('cfg/coco.names', 'r') as f:
    classes = f.read().splitlines()

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Global variables
cap = None
video_path = None
frame_count = 0
is_playing = False
seek_scale = None
video_label = None
root = None
current_frame = 0
video_listbox = None
video_files = []

# Load video from selected list

def load_video():
    global cap, frame_count, video_path, is_playing, current_frame
    selection = video_listbox.curselection()
    if not selection:
        return
    index = selection[0]
    path = video_files[index]
    video_path = path
    if cap:
        cap.release()
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    seek_scale.config(to=frame_count)
    current_frame = 0
    is_playing = True
    play_video()

# Detect and draw objects
def detect_objects(frame):
    height, width, _ = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    if len(indexes) > 0:
        for i in indexes:
            i = i[0] if isinstance(i, (list, tuple, np.ndarray)) else i
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[class_ids[i]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"{label} {confidence}", (x, y + 20), font, 2, (255, 255, 255), 2)

    return frame

# Play video frame-by-frame
def play_video():
    global current_frame
    if not cap or not is_playing:
        return

    ret, frame = cap.read()
    if not ret:
        return

    frame = detect_objects(frame)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame_rgb)
    imgtk = ImageTk.PhotoImage(image=img)
    video_label.imgtk = imgtk
    video_label.config(image=imgtk)

    seek_scale.set(current_frame)
    current_frame += 1
    if current_frame < frame_count and is_playing:
        root.after(20, play_video)

# Toggle play/pause
def toggle_play():
    global is_playing
    is_playing = not is_playing
    if is_playing:
        play_video()

# Seek to specific frame
def seek_frame(val):
    global current_frame
    current_frame = int(float(val))
    if cap:
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
    play_video()

# Skip forward
def skip_forward():
    global current_frame
    current_frame = min(current_frame + 30, frame_count - 1)
    play_video()

# Skip backward
def skip_backward():
    global current_frame
    current_frame = max(current_frame - 30, 0)
    play_video()

# GUI setup
root = tk.Tk()
root.title("YOLO Video Object Detection")
root.geometry("800x600")

video_label = tk.Label(root, bg="black")
video_label.pack(fill=tk.BOTH, expand=True)

control_frame = tk.Frame(root)
control_frame.pack(fill=tk.X, padx=10, pady=5)

video_listbox = tk.Listbox(control_frame, height=4)
video_listbox.pack(side=tk.LEFT, padx=5)

load_btn = tk.Button(control_frame, text="Load Videos", command=lambda: populate_video_list("videos"))
load_btn.pack(side=tk.LEFT, padx=5)

play_btn = tk.Button(control_frame, text="Play/Pause", command=toggle_play)
play_btn.pack(side=tk.LEFT, padx=5)

back_btn = tk.Button(control_frame, text="<< Back", command=skip_backward)
back_btn.pack(side=tk.LEFT, padx=5)

forward_btn = tk.Button(control_frame, text=">> Forward", command=skip_forward)
forward_btn.pack(side=tk.LEFT, padx=5)

seek_scale = ttk.Scale(root, from_=0, to=100, orient="horizontal", command=seek_frame)
seek_scale.pack(fill=tk.X, padx=10, pady=5)

# Populate video list from "videos" folder
def populate_video_list(folder):
    global video_files
    video_files.clear()
    video_listbox.delete(0, tk.END)
    if os.path.exists(folder):
        for file in os.listdir(folder):
            if file.endswith((".mp4", ".avi", ".mov")):
                full_path = os.path.join(folder, file)
                video_files.append(full_path)
                video_listbox.insert(tk.END, file)
    video_listbox.bind("<<ListboxSelect>>", lambda event: load_video())

root.mainloop()


In [None]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk
import os
import threading

# Global variables
video_path = ""
cap = None
is_playing = False
current_frame = 0
total_frames = 0
video_paused = False
update_id = None

# Initialize YOLO
net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
with open('cfg/coco.names', 'r') as f:
    classes = f.read().splitlines()
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Create main window
root = tk.Tk()
root.title("PAU Object Detection")
root.geometry("1000x700")

# Video display
video_label = tk.Label(root)
video_label.pack(fill=tk.BOTH, expand=True)

# Control panel
control_frame = tk.Frame(root)
control_frame.pack(fill=tk.X, pady=10)


def toggle_play():
    global is_playing, video_paused
    is_playing = not is_playing
    video_paused = False
    play_btn.config(text="Pause" if is_playing else "Play")
    if is_playing:
        play_video()

# Buttons
btn_style = {'bg': '#3498db', 'fg': 'white', 'bd': 0, 'padx': 15, 'pady': 8}

def create_button(parent, text, command):
    return tk.Button(parent, text=text, command=command, **btn_style)

load_btn = create_button(control_frame, "Load Video", lambda: load_video())
load_btn.pack(side=tk.LEFT, padx=5)

play_btn = create_button(control_frame, "Play", toggle_play)
play_btn.pack(side=tk.LEFT, padx=5)

prev_btn = create_button(control_frame, "<<", prev_frame)
prev_btn.pack(side=tk.LEFT, padx=5)

next_btn = create_button(control_frame, ">>", next_frame)
next_btn.pack(side=tk.LEFT, padx=5)

# Seek bar
seek_scale = tk.Scale(control_frame, from_=0, to=100, orient=tk.HORIZONTAL, 
                     command=seek_video, length=400)
seek_scale.pack(side=tk.LEFT, padx=10, fill=tk.X, expand=True)

# Status label
status_label = tk.Label(control_frame, text="Ready to load video")
status_label.pack(side=tk.LEFT, padx=10)

def load_video():
    global video_path, cap, total_frames, current_frame
    file_path = filedialog.askopenfilename(filetypes=[("Video files", "*.mp4 *.avi *.mov")])
    if file_path:
        video_path = file_path
        if cap is not None:
            cap.release()
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        current_frame = 0
        seek_scale.config(to=total_frames)
        status_label.config(text=f"Loaded: {os.path.basename(video_path)}")
        update_frame()

def play_video():
    global is_playing, update_id
    if is_playing and cap is not None:
        update_frame()
        update_id = root.after(30, play_video)

def update_frame():
    global current_frame, cap, video_paused
    
    if cap is None or video_paused:
        return
        
    ret, frame = cap.read()
    if not ret:
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        current_frame = 0
        return
        
    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
    seek_scale.set(current_frame)
    
    # Object detection in a separate thread for smoother playback
    detection_thread = threading.Thread(target=process_frame, args=(frame.copy(),))
    detection_thread.start()
    
    # Display frame (without waiting for detection to complete)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame)
    img = ImageTk.PhotoImage(image=img)
    video_label.img = img
    video_label.config(image=img)

def process_frame(frame):
    height, width, _ = frame.shape
    
    # YOLO object detection
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(output_layers)
    
    boxes = []
    confidences = []
    class_ids = []
    
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(classes), 3))
    
    if len(indexes) > 0:
        for i in indexes:
            i = i[0] if isinstance(i, (list, tuple, np.ndarray)) else i
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[class_ids[i]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"{label} {confidence}", (x, y + 20), font, 2, (255, 255, 255), 2)
    
    return frame

def seek_video(pos):
    global current_frame, video_paused
    if cap is not None:
        video_paused = True
        frame_pos = int(float(pos))
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
        current_frame = frame_pos
        update_frame()

def next_frame():
    global current_frame, video_paused
    if cap is not None:
        video_paused = True
        current_frame = min(current_frame + 1, total_frames - 1)
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        update_frame()

def prev_frame():
    global current_frame, video_paused
    if cap is not None:
        video_paused = True
        current_frame = max(current_frame - 1, 0)
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        update_frame()

def on_closing():
    global cap, update_id
    if update_id:
        root.after_cancel(update_id)
    if cap:
        cap.release()
    root.destroy()

root.protocol("WM_DELETE_WINDOW", on_closing)
root.mainloop()

In [None]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import os
import threading

# Global variables
video_path = ""
cap = None
is_playing = False
current_frame = 0
total_frames = 0
video_paused = False
update_id = None

# Initialize YOLO
net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
with open('cfg/coco.names', 'r') as f:
    classes = f.read().splitlines()
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Create main window
root = tk.Tk()
root.title("PAU Object Detection")
root.geometry("1000x700")

# Video display
video_label = tk.Label(root)
video_label.pack(fill=tk.BOTH, expand=True)

# Control panel
control_frame = tk.Frame(root)
control_frame.pack(fill=tk.X, pady=10)

# Button style
btn_style = {'bg': '#3498db', 'fg': 'white', 'bd': 0, 'padx': 15, 'pady': 8}

def create_button(parent, text, command):
    return tk.Button(parent, text=text, command=command, **btn_style)

def load_video():
    global video_path, cap, total_frames, current_frame
    file_path = filedialog.askopenfilename(filetypes=[("Video files", "*.mp4 *.avi *.mov")])
    if file_path:
        video_path = file_path
        if cap is not None:
            cap.release()
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        current_frame = 0
        seek_scale.config(to=total_frames)
        status_label.config(text=f"Loaded: {os.path.basename(video_path)}")
        update_frame()

def toggle_play():
    global is_playing, video_paused
    is_playing = not is_playing
    video_paused = False
    play_btn.config(text="Pause" if is_playing else "Play")
    if is_playing:
        play_video()

def play_video():
    global is_playing, update_id
    if is_playing and cap is not None:
        update_frame()
        update_id = root.after(30, play_video)

def update_frame():
    global current_frame, cap, video_paused

    if cap is None or video_paused:
        return

    ret, frame = cap.read()
    if not ret:
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        current_frame = 0
        return

    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
    seek_scale.set(current_frame)

    # Object detection in a separate thread for smoother playback
    detection_thread = threading.Thread(target=process_frame, args=(frame.copy(),))
    detection_thread.start()

    # Display frame (without waiting for detection to complete)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame)
    img = ImageTk.PhotoImage(image=img)
    video_label.img = img
    video_label.config(image=img)

def process_frame(frame):
    height, width, _ = frame.shape

    # YOLO object detection
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    if len(indexes) > 0:
        for i in indexes:
            i = i[0] if isinstance(i, (list, tuple, np.ndarray)) else i
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[class_ids[i]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"{label} {confidence}", (x, y + 20), font, 2, (255, 255, 255), 2)

    return frame

def seek_video(pos):
    global current_frame, video_paused
    if cap is not None:
        video_paused = True
        frame_pos = int(float(pos))
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
        current_frame = frame_pos
        update_frame()

def next_frame():
    global current_frame, video_paused
    if cap is not None:
        video_paused = True
        current_frame = min(current_frame + 1, total_frames - 1)
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        update_frame()

def prev_frame():
    global current_frame, video_paused
    if cap is not None:
        video_paused = True
        current_frame = max(current_frame - 1, 0)
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        update_frame()

def on_closing():
    global cap, update_id
    if update_id:
        root.after_cancel(update_id)
    if cap:
        cap.release()
    root.destroy()

# Buttons (after function definitions)
load_btn = create_button(control_frame, "Load Video", load_video)
load_btn.pack(side=tk.LEFT, padx=5)

play_btn = create_button(control_frame, "Play", toggle_play)
play_btn.pack(side=tk.LEFT, padx=5)

prev_btn = create_button(control_frame, "<<", prev_frame)
prev_btn.pack(side=tk.LEFT, padx=5)

next_btn = create_button(control_frame, ">>", next_frame)
next_btn.pack(side=tk.LEFT, padx=5)

# Seek bar
seek_scale = tk.Scale(control_frame, from_=0, to=100, orient=tk.HORIZONTAL,
                      command=seek_video, length=400)
seek_scale.pack(side=tk.LEFT, padx=10, fill=tk.X, expand=True)

# Status label
status_label = tk.Label(control_frame, text="Ready to load video")
status_label.pack(side=tk.LEFT, padx=10)

# Handle closing
root.protocol("WM_DELETE_WINDOW", on_closing)

# Start the GUI
root.mainloop()


In [3]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import os
import threading

# Global variables
cap = None
is_playing = False
current_frame = 0
total_frames = 0
video_paused = False
update_id = None

# Initialize YOLO
net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
with open('cfg/coco.names', 'r') as f:
    classes = f.read().splitlines()
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Main window
root = tk.Tk()
root.title("PAU Object Detection")
root.geometry("1000x700")

# Video frame
video_label = tk.Label(root)
video_label.pack(fill=tk.BOTH, expand=True)

# Control panel
control_frame = tk.Frame(root)
control_frame.pack(fill=tk.X, pady=10)

# Button style
btn_style = {'bg': '#3498db', 'fg': 'white', 'bd': 0, 'padx': 15, 'pady': 8}

def create_button(parent, text, command):
    return tk.Button(parent, text=text, command=command, **btn_style)

def list_videos():
    folder = 'videos'
    return [f for f in os.listdir(folder) if f.endswith(('.mp4', '.avi', '.mov'))]

def on_video_select(event):
    selection = video_listbox.curselection()
    if selection:
        filename = video_listbox.get(selection[0])
        load_video(os.path.join('videos', filename))

def load_video(path):
    global cap, total_frames, current_frame, is_playing
    if cap:
        cap.release()
    cap = cv2.VideoCapture(path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    current_frame = 0
    is_playing = False
    seek_scale.config(to=total_frames)
    play_btn.config(text="Play")
    status_label.config(text=f"Loaded: {os.path.basename(path)}")
    update_frame()

def toggle_play():
    global is_playing, video_paused
    is_playing = not is_playing
    video_paused = False
    play_btn.config(text="Pause" if is_playing else "Play")
    if is_playing:
        play_video()

def play_video():
    global update_id
    if is_playing and cap:
        update_frame()
        update_id = root.after(30, play_video)

def update_frame():
    global current_frame, cap, video_paused
    if cap is None or video_paused:
        return

    cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
    ret, frame = cap.read()
    if not ret:
        return

    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
    seek_scale.set(current_frame)

    # Detection in background thread
    threading.Thread(target=process_frame, args=(frame.copy(),)).start()

    # Show frame
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame_rgb)
    imgtk = ImageTk.PhotoImage(image=img)
    video_label.imgtk = imgtk
    video_label.config(image=imgtk)
    
    try:
        threading.Thread(target=process_frame, args=(frame.copy(),)).start()
    except Exception as e:
        print(f"Error starting processing thread: {e}")

# def process_frame(frame):
#     height, width = frame.shape[:2]
#     blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), swapRB=True, crop=False)
#     net.setInput(blob)
#     outs = net.forward(output_layers)

#     boxes, confidences, class_ids = [], [], []
#     for out in outs:
#         for detection in out:
#             scores = detection[5:]
#             class_id = np.argmax(scores)
#             confidence = scores[class_id]
#             if confidence > 0.5:
#                 center_x, center_y = int(detection[0]*width), int(detection[1]*height)
#                 w, h = int(detection[2]*width), int(detection[3]*height)
#                 x, y = int(center_x - w/2), int(center_y - h/2)
#                 boxes.append([x, y, w, h])
#                 confidences.append(float(confidence))
#                 class_ids.append(class_id)

#     indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
#     font = cv2.FONT_HERSHEY_SIMPLEX
#     colors = np.random.uniform(0, 255, size=(len(classes), 3))

#     for i in indexes.flatten():
#         x, y, w, h = boxes[i]
#         label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
#         color = colors[class_ids[i]]
#         cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
#         cv2.putText(frame, label, (x, y - 10), font, 0.5, (255, 255, 255), 1)

def process_frame(frame):
    height, width = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    boxes, confidences, class_ids = [], [], []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Ensure coordinates are valid numbers
                center_x = int(detection[0] * width) if not np.isnan(detection[0]) and not np.isinf(detection[0]) else 0
                center_y = int(detection[1] * height) if not np.isnan(detection[1]) and not np.isinf(detection[1]) else 0
                w = int(detection[2] * width) if not np.isnan(detection[2]) and not np.isinf(detection[2]) else 0
                h = int(detection[3] * height) if not np.isnan(detection[3]) and not np.isinf(detection[3]) else 0
                
                # Ensure w and h are positive
                w, h = abs(w), abs(h)
                x, y = int(center_x - w/2), int(center_y - h/2)
                
                # Ensure coordinates are within frame bounds
                x = max(0, min(x, width - 1))
                y = max(0, min(y, height - 1))
                w = max(1, min(w, width - x))
                h = max(1, min(h, height - y))
                
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_SIMPLEX
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    # Handle different return types from NMSBoxes
    if len(indexes) > 0:
        if isinstance(indexes, tuple):  # For OpenCV versions that return tuple
            indexes = indexes[0]
        indexes = indexes.flatten()

        for i in indexes:
            x, y, w, h = boxes[i]
            label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
            color = colors[class_ids[i]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, label, (x, y - 10), font, 0.5, (255, 255, 255), 1)

def seek_video(pos):
    global current_frame, video_paused
    if cap:
        video_paused = True
        frame_pos = int(float(pos))
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
        current_frame = frame_pos
        update_frame()

def on_closing():
    global cap, update_id
    if update_id:
        root.after_cancel(update_id)
    if cap:
        cap.release()
    root.destroy()

# Buttons
load_btn = create_button(control_frame, "Refresh Videos", lambda: refresh_video_list())
load_btn.pack(side=tk.LEFT, padx=5)

play_btn = create_button(control_frame, "Play", toggle_play)
play_btn.pack(side=tk.LEFT, padx=5)

# Seek bar
seek_scale = tk.Scale(control_frame, from_=0, to=100, orient=tk.HORIZONTAL,
                      command=seek_video, length=400)
seek_scale.pack(side=tk.LEFT, padx=10, fill=tk.X, expand=True)

# Status label
status_label = tk.Label(control_frame, text="Select a video from the list")
status_label.pack(side=tk.LEFT, padx=10)

# Listbox for videos
video_listbox = tk.Listbox(root, height=6)
video_listbox.pack(fill=tk.X, padx=20, pady=10)
video_listbox.bind("<<ListboxSelect>>", on_video_select)

def refresh_video_list():
    video_listbox.delete(0, tk.END)
    for vid in list_videos():
        video_listbox.insert(tk.END, vid)

refresh_video_list()
root.protocol("WM_DELETE_WINDOW", on_closing)
root.mainloop()

Exception in thread Thread-8 (process_frame):
Traceback (most recent call last):
  File "C:\Users\lovef\anaconda3\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "C:\Users\lovef\anaconda3\Lib\threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\lovef\AppData\Local\Temp\ipykernel_18756\2752250224.py", line 143, in process_frame
cv2.error: Unknown C++ exception from OpenCV code


In [3]:
pip install --upgrade opencv-python numpy

Collecting opencv-python
  Obtaining dependency information for opencv-python from https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting numpy
  Obtaining dependency information for numpy from https://files.pythonhosted.org/packages/8b/72/10c1d2d82101c468a28adc35de6c77b308f288cfd0b88e1070f15b98e00c/numpy-2.2.4-cp311-cp311-win_amd64.whl.metadata
  Using cached numpy-2.2.4-cp311-cp311-win_amd64.whl.metadata (60 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   ------------------------------

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\lovef\\anaconda3\\Lib\\site-packages\\~-mpy.libs\\libopenblas64__v0.3.23-293-gc2f4bdbb-gcc_10_3_0-2bde3a66a51006b2b53eb373ff767a3f.dll'
Consider using the `--user` option or check the permissions.



In [4]:
pip install --upgrade opencv-python numpy

Collecting opencv-python
  Obtaining dependency information for opencv-python from https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata
  Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86
Note: you may need to restart the kernel to use updated packages.


In [None]:
print(f"OpenCV version: {cv2.__version__}")
print(f"Numpy version: {np.__version__}")

In [2]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import os
import threading
import time

# Global variables
cap = None
is_playing = False
current_frame = 0
total_frames = 0
video_paused = False
update_id = None
processing_lock = threading.Lock()

# Initialize YOLO
try:
    net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
    with open('cfg/coco.names', 'r') as f:
        classes = f.read().splitlines()
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
except Exception as e:
    print(f"Error loading YOLO model: {e}")
    classes = []
    output_layers = []

# Main window
root = tk.Tk()
root.title("PAU Object Detection")
root.geometry("1000x700")

# Video frame
video_label = tk.Label(root)
video_label.pack(fill=tk.BOTH, expand=True)

# Control panel
control_frame = tk.Frame(root)
control_frame.pack(fill=tk.X, pady=10)

# Button style
btn_style = {'bg': '#3498db', 'fg': 'white', 'bd': 0, 'padx': 15, 'pady': 8}

def create_button(parent, text, command):
    return tk.Button(parent, text=text, command=command, **btn_style)

def list_videos():
    folder = 'videos'
    if not os.path.exists(folder):
        os.makedirs(folder)
    return [f for f in os.listdir(folder) if f.endswith(('.mp4', '.avi', '.mov'))]

def on_video_select(event):
    selection = video_listbox.curselection()
    if selection:
        filename = video_listbox.get(selection[0])
        load_video(os.path.join('videos', filename))

def load_video(path):
    global cap, total_frames, current_frame, is_playing
    try:
        if cap:
            cap.release()
        cap = cv2.VideoCapture(path)
        if not cap.isOpened():
            raise ValueError("Could not open video file")
        
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        current_frame = 0
        is_playing = False
        seek_scale.config(to=total_frames)
        play_btn.config(text="Play")
        status_label.config(text=f"Loaded: {os.path.basename(path)}")
        update_frame()
    except Exception as e:
        status_label.config(text=f"Error: {str(e)}")

def toggle_play():
    global is_playing, video_paused
    is_playing = not is_playing
    video_paused = False
    play_btn.config(text="Pause" if is_playing else "Play")
    if is_playing:
        play_video()

def play_video():
    global update_id
    if is_playing and cap:
        update_frame()
        update_id = root.after(30, play_video)

def update_frame():
    global current_frame, cap, video_paused
    if cap is None or video_paused:
        return

    try:
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        ret, frame = cap.read()
        if not ret:
            return

        current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        seek_scale.set(current_frame)

        # Show frame immediately
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame_rgb)
        imgtk = ImageTk.PhotoImage(image=img)
        video_label.imgtk = imgtk
        video_label.config(image=imgtk)

        # Start processing thread if not already processing
        if not processing_lock.locked():
            try:
                threading.Thread(target=process_frame, args=(frame.copy(),), daemon=True).start()
            except Exception as e:
                print(f"Error starting processing thread: {e}")
    except Exception as e:
        print(f"Error updating frame: {e}")

def process_frame(frame):
    with processing_lock:
        try:
            if frame is None or frame.size == 0:
                return

            height, width = frame.shape[:2]
            
            try:
                blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), swapRB=True, crop=False)
                net.setInput(blob)
                outs = net.forward(output_layers)
            except Exception as e:
                print(f"Network error: {e}")
                return

            boxes, confidences, class_ids = [], [], []
            
            for out in outs:
                for detection in out:
                    try:
                        scores = detection[5:]
                        class_id = np.argmax(scores)
                        confidence = scores[class_id]
                        
                        if confidence > 0.5:
                            # Safe coordinate conversion
                            center_x = detection[0] * width
                            center_y = detection[1] * height
                            w = detection[2] * width
                            h = detection[3] * height
                            
                            # Validate coordinates
                            if (any(np.isnan([center_x, center_y, w, h])) or 
                                any(np.isinf([center_x, center_y, w, h])) or
                                w <= 0 or h <= 0):
                                continue
                                
                            center_x, center_y = int(center_x), int(center_y)
                            w, h = int(w), int(h)
                            
                            x = max(0, min(int(center_x - w/2), width - 1))
                            y = max(0, min(int(center_y - h/2), height - 1))
                            w = max(1, min(w, width - x))
                            h = max(1, min(h, height - y))
                            
                            boxes.append([x, y, w, h])
                            confidences.append(float(confidence))
                            class_ids.append(class_id)
                    except Exception as e:
                        print(f"Detection processing error: {e}")
                        continue

            if not boxes:
                return

            try:
                indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
                
                if len(indexes) > 0:
                    # Handle different return types from NMSBoxes
                    if isinstance(indexes, (tuple, list)):
                        indexes = np.array(indexes)
                    indexes = indexes.flatten()
                    
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    colors = np.random.uniform(0, 255, size=(len(classes), 3))
                    
                    for i in indexes:
                        try:
                            x, y, w, h = boxes[i]
                            label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
                            color = colors[class_ids[i]]
                            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                            cv2.putText(frame, label, (x, y - 10), font, 0.5, (255, 255, 255), 1)
                        except Exception as e:
                            print(f"Drawing error for detection {i}: {e}")
                            continue
                            
                    # Update the displayed frame with detections
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    img = Image.fromarray(frame_rgb)
                    imgtk = ImageTk.PhotoImage(image=img)
                    video_label.imgtk = imgtk
                    video_label.config(image=imgtk)
            except Exception as e:
                print(f"NMS error: {e}")
                return
                
        except Exception as e:
            print(f"General processing error: {e}")
            return

def seek_video(pos):
    global current_frame, video_paused
    if cap:
        video_paused = True
        frame_pos = int(float(pos))
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
        current_frame = frame_pos
        update_frame()

def on_closing():
    global cap, update_id
    if update_id:
        root.after_cancel(update_id)
    if cap:
        cap.release()
    root.destroy()

# Buttons
load_btn = create_button(control_frame, "Refresh Videos", lambda: refresh_video_list())
load_btn.pack(side=tk.LEFT, padx=5)

play_btn = create_button(control_frame, "Play", toggle_play)
play_btn.pack(side=tk.LEFT, padx=5)

# Seek bar
seek_scale = tk.Scale(control_frame, from_=0, to=100, orient=tk.HORIZONTAL,
                      command=seek_video, length=400)
seek_scale.pack(side=tk.LEFT, padx=10, fill=tk.X, expand=True)

# Status label
status_label = tk.Label(control_frame, text="Select a video from the list")
status_label.pack(side=tk.LEFT, padx=10)

# Listbox for videos
video_listbox = tk.Listbox(root, height=6)
video_listbox.pack(fill=tk.X, padx=20, pady=10)
video_listbox.bind("<<ListboxSelect>>", on_video_select)

def refresh_video_list():
    video_listbox.delete(0, tk.END)
    for vid in list_videos():
        video_listbox.insert(tk.END, vid)

refresh_video_list()
root.protocol("WM_DELETE_WINDOW", on_closing)
root.mainloop()

In [5]:
import cv2
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk
import numpy as np
import os

# Global variables
video_paths = []
cap = None
current_video_index = 0
total_frames = 0
is_playing = False
classes = []
net = None
output_layers = []
root = None

# UI elements
video_label = None
info_label = None
play_btn = None
video_listbox = None
seek_var = None
seek_bar = None
progress = None
count_label = None

def load_yolo():
    global net, classes, output_layers
    net = cv2.dnn.readNet("cfg/yolov3.weights", "cfg/yolov3.cfg")
    with open("cfg/coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

def setup_gui():
    global root, video_label, info_label, play_btn, video_listbox, seek_var, seek_bar, progress, count_label
    root.configure(bg="#2c3e50")
    main_frame = tk.Frame(root, bg="#2c3e50")
    main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

    video_label = tk.Label(main_frame, bg="black")
    video_label.pack(fill=tk.BOTH, expand=True)

    control_frame = tk.Frame(main_frame, bg="#2c3e50")
    control_frame.pack(fill=tk.X, pady=10)

    btn_style = {'bg': "#3498db", 'fg': "#ecf0f1", 'bd': 0, 'padx': 15, 'pady': 8}

    load_btn = tk.Button(control_frame, text="Load Videos", command=load_videos, **btn_style)
    load_btn.pack(side=tk.LEFT, padx=5)

    play_btn = tk.Button(control_frame, text="Play", command=toggle_play, **btn_style)
    play_btn.pack(side=tk.LEFT, padx=5)

    info_label = tk.Label(control_frame, text="Ready to load videos", bg="#2c3e50", fg="#ecf0f1")
    info_label.pack(side=tk.LEFT, padx=20)

    video_listbox = tk.Listbox(control_frame, height=3, bg="#2c3e50", fg="#ecf0f1", selectbackground="#3498db")
    video_listbox.pack(side=tk.RIGHT, padx=5)
    video_listbox.bind("<<ListboxSelect>>", select_video)

    seek_var = tk.DoubleVar()
    seek_bar = ttk.Scale(main_frame, from_=0, to=100, orient="horizontal", variable=seek_var, command=seek_video)
    seek_bar.pack(fill=tk.X, padx=10, pady=5)

    progress = ttk.Progressbar(main_frame, mode='determinate', length=300)
    progress.pack(pady=5)

    count_label = tk.Label(main_frame, text="Objects Detected: 0", bg="#2c3e50", fg="#ecf0f1", font=("Arial", 12))
    count_label.pack()

def load_videos():
    global video_paths, info_label
    video_paths.clear()
    video_dir = "videos"
    if os.path.exists(video_dir):
        for file in os.listdir(video_dir):
            if file.endswith((".mp4", ".avi", ".mov")):
                video_paths.append(os.path.join(video_dir, file))

    video_listbox.delete(0, tk.END)
    for path in video_paths:
        video_listbox.insert(tk.END, os.path.basename(path))

    if video_paths:
        info_label.config(text=f"Loaded {len(video_paths)} videos")
    else:
        info_label.config(text="No videos found in 'videos' folder")

def select_video(event):
    global current_video_index
    selection = event.widget.curselection()
    if selection:
        current_video_index = selection[0]
        initialize_video()

def initialize_video():
    global cap, total_frames, is_playing
    if cap:
        cap.release()
    cap_path = video_paths[current_video_index]
    cap = cv2.VideoCapture(cap_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    progress["maximum"] = total_frames
    is_playing = False
    play_btn.config(text="Play")

def toggle_play():
    global is_playing
    if not cap or not cap.isOpened():
        return
    is_playing = not is_playing
    play_btn.config(text="Pause" if is_playing else "Play")
    if is_playing:
        update_frame()

def update_frame():
    global cap
    if not is_playing or not cap or not cap.isOpened():
        return

    ret, frame = cap.read()
    if not ret:
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        return

    frame = detect_objects(frame)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame_rgb)
    img = ImageTk.PhotoImage(image=img)

    video_label.img = img
    video_label.config(image=img)

    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
    seek_var.set((current_frame / total_frames) * 100)
    progress["value"] = current_frame

    root.after(10, update_frame)

def seek_video(value):
    if cap and cap.isOpened():
        frame_number = int((float(value) / 100) * total_frames)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

def detect_objects(frame):
    height, width, _ = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    count_label.config(text=f"Objects Detected: {len(indexes)}")

    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(classes), 3))
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = confidences[i]
            color = colors[class_ids[i]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"{label} {confidence:.2f}", (x, y + 30), font, 2, color, 2)

    return frame

if __name__ == "__main__":
    root = tk.Tk()
    root.title("PAU Video Object Detector")
    root.geometry("1000x700")
    load_yolo()
    setup_gui()
    root.mainloop()


In [8]:
import cv2
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk
import numpy as np
import os

# ---------- GLOBAL VARIABLES ----------
video_paths = []           # List of loaded video paths
cap = None                 # Video capture object
current_video_index = 0    # Index of the currently selected video
total_frames = 0           # Total number of frames in the video
is_playing = False         # Indicates if video is playing

# YOLO components
net = None
classes = []
output_layers = []

# UI elements
root = None
video_label = None
info_label = None
play_btn = None
video_listbox = None
seek_var = None
seek_bar = None

# ---------- SETUP YOLO ----------
def load_yolo():
    """Load YOLO model, weights, and class names."""
    global net, classes, output_layers
    net = cv2.dnn.readNet("cfg/yolov3.weights", "cfg/yolov3.cfg")
    
    with open("cfg/coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]
    
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# ---------- SETUP GUI ----------
def setup_gui():
    """Builds the GUI layout using Tkinter widgets."""
    global root, video_label, info_label, play_btn, video_listbox, seek_var, seek_bar

    root.configure(bg="white")
    main_frame = tk.Frame(root, bg="white")
    main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

    # Where video frames are shown
    video_label = tk.Label(main_frame, bg="black")
    video_label.pack(fill=tk.BOTH, expand=True)

    # Control buttons
    control_frame = tk.Frame(main_frame, bg="white")
    control_frame.pack(fill=tk.X, pady=10)

    # Load button
    load_btn = tk.Button(control_frame, text="Load Videos", command=load_videos)
    load_btn.pack(side=tk.LEFT, padx=5)

    # Play button
    play_btn = tk.Button(control_frame, text="Play", command=toggle_play)
    play_btn.pack(side=tk.LEFT, padx=5)

    # Status/info text
    info_label = tk.Label(control_frame, text="Ready to load videos", bg="white")
    info_label.pack(side=tk.LEFT, padx=20)

    # List of videos
    video_listbox = tk.Listbox(control_frame, height=3)
    video_listbox.pack(side=tk.RIGHT, padx=5)
    video_listbox.bind("<<ListboxSelect>>", select_video)

    # Seek bar to jump between frames
    seek_var = tk.DoubleVar()
    seek_bar = ttk.Scale(main_frame, from_=0, to=100, orient="horizontal", variable=seek_var, command=seek_video)
    seek_bar.pack(fill=tk.X, padx=10, pady=5)

# ---------- LOAD VIDEOS ----------
def load_videos():
    """Load video files from the 'videos' folder."""
    global video_paths
    video_paths.clear()

    video_dir = "videos"
    if os.path.exists(video_dir):
        for file in os.listdir(video_dir):
            if file.endswith((".mp4", ".avi", ".mov")):
                video_paths.append(os.path.join(video_dir, file))

    # Populate listbox
    video_listbox.delete(0, tk.END)
    for path in video_paths:
        video_listbox.insert(tk.END, os.path.basename(path))

    # Update status text
    if video_paths:
        info_label.config(text=f"Loaded {len(video_paths)} videos")
    else:
        info_label.config(text="No videos found in 'videos' folder")

# ---------- SELECT VIDEO ----------
def select_video(event):
    """Triggered when a user selects a video from the list."""
    global current_video_index
    selection = event.widget.curselection()
    if selection:
        current_video_index = selection[0]
        initialize_video()

# ---------- INIT VIDEO ----------
def initialize_video():
    """Setup video capture for the selected video."""
    global cap, total_frames, is_playing
    if cap:
        cap.release()

    cap_path = video_paths[current_video_index]
    cap = cv2.VideoCapture(cap_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    seek_bar.config(to=100)
    is_playing = False
    play_btn.config(text="Play")

# ---------- PLAY TOGGLE ----------
def toggle_play():
    """Start or pause the video playback."""
    global is_playing
    if not cap or not cap.isOpened():
        return
    is_playing = not is_playing
    play_btn.config(text="Pause" if is_playing else "Play")
    if is_playing:
        update_frame()

# ---------- FRAME LOOP ----------
def update_frame():
    """Read, process, and show next video frame."""
    global cap
    if not is_playing or not cap or not cap.isOpened():
        return

    ret, frame = cap.read()
    if not ret:
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        return

    frame = detect_objects(frame)  # Apply YOLO

    # Convert and show frame
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame_rgb)
    img = ImageTk.PhotoImage(image=img)
    video_label.img = img
    video_label.config(image=img)

    # Update seek bar
    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
    seek_var.set((current_frame / total_frames) * 100)

    # Loop
    root.after(10, update_frame)

# ---------- SEEK ----------
def seek_video(value):
    """Jump to a specific frame based on seek bar value."""
    if cap and cap.isOpened():
        frame_number = int((float(value) / 100) * total_frames)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

# ---------- DETECTION ----------
def detect_objects(frame):
    """Perform YOLO detection and annotate detected objects."""
    height, width, _ = frame.shape

    # Prepare frame for YOLO input
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []

    # Analyze detections
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Get coordinates of box
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Filter overlapping boxes
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Draw plain white boxes and labels
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = f"{classes[class_ids[i]]} {confidences[i]:.2f}"
            cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 255, 255), 2)
            cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

    return frame

# ---------- MAIN ----------
if __name__ == "__main__":
    root = tk.Tk()
    root.title("PAU Video Object Detector")
    root.geometry("1000x700")
    load_yolo()     # Load YOLO model
    setup_gui()     # Create GUI
    root.mainloop() # Start GUI event loop
