In [None]:
import tkinter as tk
from tkinter import filedialog, Label
from ultralytics import YOLO
import cv2
from threading import Thread

# Load YOLOv8 model
model = YOLO('yolov8n.pt')  # Use your desired YOLO model weights

# Create main window
window = tk.Tk()
window.title("YOLOv8 Object Detection")
window.geometry("500x400")
window.configure(bg="#2C2F33")

# Global variables for video source
video_source = None
running = False

# Functions
def select_video():
    global video_source
    video_source = filedialog.askopenfilename(
        title="Select Video File",
        filetypes=[("Video Files", "*.mp4 *.avi *.mkv")]
    )
    if video_source:
        status_label.config(text=f"Selected Video: {video_source}", fg="white")
    else:
        status_label.config(text="No video selected.", fg="red")

def start_detection():
    global running
    running = True
    if video_source or video_source == 0:  # Check if a video source is available
        Thread(target=run_detection).start()
    else:
        status_label.config(text="Please select a video source!", fg="red")

def stop_detection():
    global running
    running = False
    status_label.config(text="Detection stopped.", fg="yellow")

def run_detection():
    global running
    cap = cv2.VideoCapture(video_source if video_source != 0 else 0)

    if not cap.isOpened():
        status_label.config(text="Error: Could not open video source.", fg="red")
        return

    while running:
        ret, frame = cap.read()
        if not ret:
            status_label.config(text="Video ended or no frame captured.", fg="yellow")
            break

        # Perform detection
        results = model(frame)
        annotated_frame = results[0].plot()

        # Display frame
        cv2.imshow("YOLOv8 Detection", annotated_frame)

        # Stop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

def set_webcam():
    global video_source
    video_source = 0
    status_label.config(text="Using Webcam.", fg="white")

# Widgets with dark theme
font_title = ("Helvetica", 18, "bold")
font_buttons = ("Helvetica", 12)
font_status = ("Helvetica", 10)

# Title
title_label = Label(window, text="YOLOv8 Object Detection", font=font_title, bg="#23272A", fg="white")
title_label.pack(pady=20)

# Buttons
select_button = tk.Button(window, text="Select Video", command=select_video, font=font_buttons, bg="#7289DA", fg="white", relief="flat")
select_button.pack(pady=10)

webcam_button = tk.Button(window, text="Use Webcam", command=set_webcam, font=font_buttons, bg="#7289DA", fg="white", relief="flat")
webcam_button.pack(pady=10)

start_button = tk.Button(window, text="Start Detection", command=start_detection, font=font_buttons, bg="#43B581", fg="white", relief="flat")
start_button.pack(pady=10)

stop_button = tk.Button(window, text="Stop Detection", command=stop_detection, font=font_buttons, bg="#F04747", fg="white", relief="flat")
stop_button.pack(pady=10)

# Status Label
status_label = Label(window, text="Status: Idle", font=font_status, bg="#2C2F33", fg="white")
status_label.pack(pady=20)

# Run the GUI
window.mainloop()



0: 480x640 1 person, 1 chair, 182.9ms
Speed: 16.4ms preprocess, 182.9ms inference, 14.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 chair, 1 bed, 77.1ms
Speed: 3.0ms preprocess, 77.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 bed, 81.6ms
Speed: 3.0ms preprocess, 81.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 chair, 1 bed, 74.8ms
Speed: 2.0ms preprocess, 74.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 bed, 83.7ms
Speed: 2.0ms preprocess, 83.7ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 bed, 75.9ms
Speed: 3.4ms preprocess, 75.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 chair, 1 bed, 75.9ms
Speed: 2.0ms preprocess, 75.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 48