In [1]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk

# Global variables
paused = False
current_frame = 0
rewind_seconds = 2
fastforward_seconds = 2
cap = None
writer = None
net = None
classes = []
video_path = ""
output_path = ""

def load_yolo():
    global net, classes
    net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
    with open('cfg/coco.names', 'r') as f:
        classes = f.read().splitlines()

def select_video():
    global video_path
    video_path = filedialog.askopenfilename(title="Select Video File", 
                                          filetypes=[("Video Files", "*.mp4 *.avi *.mov")])
    if video_path:
        video_entry.delete(0, tk.END)
        video_entry.insert(0, video_path)
        check_duration()

def select_output():
    global output_path
    output_path = filedialog.asksaveasfilename(title="Save Output As",
                                             defaultextension=".mp4",
                                             filetypes=[("MP4 Files", "*.mp4")])
    if output_path:
        output_entry.delete(0, tk.END)
        output_entry.insert(0, output_path)

def check_duration():
    global cap, video_path
    if not video_path:
        return
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        status_label.config(text=f"Error: Could not open video file {video_path}", fg="red")
        return
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count / fps
    cap.release()
    
    if duration > 10:
        status_label.config(text=f"Error: Video duration ({duration:.2f}s) exceeds 10s limit", fg="red")
        start_button.config(state=tk.DISABLED)
    else:
        status_label.config(text=f"Video loaded: {duration:.2f} seconds", fg="green")
        start_button.config(state=tk.NORMAL)

def toggle_pause():
    global paused
    paused = not paused
    pause_button.config(text="Resume" if paused else "Pause")

def rewind():
    global current_frame, cap
    if cap:
        fps = cap.get(cv2.CAP_PROP_FPS)
        new_frame = max(0, current_frame - int(rewind_seconds * fps))
        cap.set(cv2.CAP_PROP_POS_FRAMES, new_frame)
        current_frame = new_frame

def fast_forward():
    global current_frame, cap
    if cap:
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        new_frame = min(frame_count, current_frame + int(fastforward_seconds * fps))
        cap.set(cv2.CAP_PROP_POS_FRAMES, new_frame)
        current_frame = new_frame

def process_frame(frame):
    global net, classes
    
    height, width = frame.shape[:2]
    
    # Preprocess frame
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    
    # Forward pass
    output_layers_names = net.getUnconnectedOutLayersNames()
    layer_outputs = net.forward(output_layers_names)
    
    boxes = []
    confidences = []
    class_ids = []
    
    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    
    # Non-max suppression
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    
    # Draw detections
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(classes), 3))
    
    if indexes is not None:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[i]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, label + " " + confidence, (x, y + 20), font, 2, (255, 255, 255), 2)
    
    return frame

def update_video():
    global paused, current_frame, cap, writer
    
    if cap and not paused:
        ret, frame = cap.read()
        current_frame += 1
        if ret:
            processed_frame = process_frame(frame)
            
            # Display frame number and status
            cv2.putText(processed_frame, f"Frame: {current_frame}", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            cv2.putText(processed_frame, "PAUSED" if paused else "PLAYING", (10, 60), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255) if paused else (0, 255, 0), 2)
            
            if writer:
                writer.write(processed_frame)
            
            # Convert to RGB and display in GUI
            cv2image = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(cv2image)
            imgtk = ImageTk.PhotoImage(image=img)
            video_label.imgtk = imgtk
            video_label.configure(image=imgtk)
    
    video_label.after(10, update_video)

def start_processing():
    global cap, writer, video_path, output_path
    
    # Initialize video capture
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        status_label.config(text=f"Error: Could not open video file {video_path}", fg="red")
        return
    
    # Initialize video writer if output path is specified
    if output_path:
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Disable controls during processing
    start_button.config(state=tk.DISABLED)
    video_button.config(state=tk.DISABLED)
    output_button.config(state=tk.DISABLED)
    
    # Start video update loop
    update_video()

# Create main window
root = tk.Tk()
root.title("YOLO Object Detection")

# Video selection
tk.Label(root, text="Video File:").grid(row=0, column=0, padx=5, pady=5, sticky="e")
video_entry = tk.Entry(root, width=50)
video_entry.grid(row=0, column=1, padx=5, pady=5)
video_button = tk.Button(root, text="Browse", command=select_video)
video_button.grid(row=0, column=2, padx=5, pady=5)

# Output selection
tk.Label(root, text="Output File:").grid(row=1, column=0, padx=5, pady=5, sticky="e")
output_entry = tk.Entry(root, width=50)
output_entry.grid(row=1, column=1, padx=5, pady=5)
output_button = tk.Button(root, text="Browse", command=select_output)
output_button.grid(row=1, column=2, padx=5, pady=5)

# Status label
status_label = tk.Label(root, text="Select a video file (max 10 seconds)", fg="black")
status_label.grid(row=2, column=0, columnspan=3, pady=5)

# Start button
start_button = tk.Button(root, text="Start Processing", command=start_processing, state=tk.DISABLED)
start_button.grid(row=3, column=0, columnspan=3, pady=10)

# Video display
video_label = tk.Label(root)
video_label.grid(row=4, column=0, columnspan=3, pady=10)

# Control buttons
control_frame = tk.Frame(root)
control_frame.grid(row=5, column=0, columnspan=3, pady=10)

pause_button = tk.Button(control_frame, text="Pause", command=toggle_pause)
pause_button.pack(side=tk.LEFT, padx=5)

rewind_button = tk.Button(control_frame, text="<< Rewind", command=rewind)
rewind_button.pack(side=tk.LEFT, padx=5)

ff_button = tk.Button(control_frame, text="Fast Forward >>", command=fast_forward)
ff_button.pack(side=tk.LEFT, padx=5)

# Load YOLO model
load_yolo()

# Start GUI
root.mainloop()

# Clean up when window is closed
if cap:
    cap.release()
if writer:
    writer.release()
cv2.destroyAllWindows()