In [None]:
from ultralytics import YOLO
import cv2

# Load YOLO model
model_path = r"D:\project\yolo_training\tool_tracking7\weights\best.pt"  # Update with correct path
model = YOLO(model_path)

# Initialize video capture (replace with your video path)
video_path = "videoplayback (1).mp4"  # Update with your video path
cap = cv2.VideoCapture(video_path)

# Define desired width and height for resizing
resize_width = 640  # Desired width
resize_height = 480  # Desired height

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Resize the frame to the desired size
    frame_resized = cv2.resize(frame, (resize_width, resize_height))

    # Run YOLO detection on the current frame
    results = model(frame_resized)

    # Convert frame to RGB for display
    frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)

    # Process detections and draw bounding boxes on the frame
    for result in results:
        for box in result.boxes:
            conf = box.conf[0].item()
            
            # Only display class name and box if confidence is higher than 75%
            if conf > 0.75:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cls = int(box.cls[0].item())

                # Create label text with class name and confidence
                label = f"{model.names[cls]}: {conf:.2f}"

                # Set font scale and thickness
                font_scale = 1.0
                font_thickness = 2
                font = cv2.FONT_HERSHEY_SIMPLEX

                # Get text size
                text_size, baseline = cv2.getTextSize(label, font, font_scale, font_thickness)
                text_w, text_h = text_size

                # Ensure text stays within the image bounds
                text_x = max(10, x1)  # Avoid going off the left side
                text_y = max(text_h + 10, y1 - 10)  # Ensure text does not go above the image

                # Draw a filled rectangle for the text background (wider for better visibility)
                cv2.rectangle(frame_rgb, (text_x - 5, text_y - text_h - 5),
                              (text_x + text_w + 5, text_y + baseline + 5), (0, 255, 0), -1)

                # Draw the text label
                cv2.putText(frame_rgb, label, (text_x, text_y), font, font_scale, (0, 0, 0), font_thickness)

                # Draw the bounding box
                cv2.rectangle(frame_rgb, (x1, y1), (x2, y2), (0, 255, 0), 3)

    # Display the resized frame using OpenCV
    cv2.imshow("YOLOv8 Tool Detection", frame_rgb)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close OpenCV windows
cap.release()
cv2.destroyAllWindows()


0: 480x640 1 scalpel, 1 straight mayo scissor, 56.7ms
Speed: 2.3ms preprocess, 56.7ms inference, 97.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 scalpel, 1 straight mayo scissor, 36.7ms
Speed: 3.5ms preprocess, 36.7ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 scalpel, 1 straight mayo scissor, 37.7ms
Speed: 3.1ms preprocess, 37.7ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 scalpel, 1 straight mayo scissor, 37.8ms
Speed: 2.0ms preprocess, 37.8ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 scalpel, 1 straight mayo scissor, 32.8ms
Speed: 2.6ms preprocess, 32.8ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 scalpel, 1 straight mayo scissor, 34.2ms
Speed: 1.8ms preprocess, 34.2ms inference, 3.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 scalpel, 1 straight mayo scissor, 33.0ms
Speed: 3.0ms preprocess, 33.0ms 