In [1]:
!pip install ultralytics




[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
# from ultralytics import YOLO
# import cv2
# import matplotlib.pyplot as plt

# # Load a pretrained YOLOv8 model (YOLOv8n = nano, very fast)
# model = YOLO("yolov8n.pt")  

# # Example: run detection on an image
# image_path = "./data/IMG_4779.jpg"  # replace with your image path
# results = model(image_path)

# # Print results
# print(results[0].boxes)  # bounding boxes, confidence, class

# # Visualize results with OpenCV + matplotlib
# img = cv2.imread(image_path)
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# for box in results[0].boxes.xyxy:  # xyxy = [x1, y1, x2, y2]
#     x1, y1, x2, y2 = map(int, box)
#     cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)

# plt.figure(figsize=(8,8))
# plt.imshow(img)
# plt.axis('off')
# plt.show()

from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image
import os

def safe_object_detection(image_path, model_name="yolov8n.pt"):
    """
    Safe object detection with comprehensive error handling
    """
    try:
        # Check if image exists
        if not os.path.exists(image_path):
            print(f"❌ Image not found: {image_path}")
            return None
            
        # Load model with device specification
        print(f"🔄 Loading model: {model_name}")
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model = YOLO(model_name)
        model.to(device)
        print(f"✅ Model loaded on {device}")
        
        # Load and verify image
        print(f"🔄 Loading image: {image_path}")
        img = cv2.imread(image_path)
        if img is None:
            print("❌ Failed to load image with cv2, trying PIL...")
            img_pil = Image.open(image_path)
            img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
        
        print(f"✅ Image loaded, shape: {img.shape}")
        
        # Run detection with explicit parameters
        print("🔄 Running object detection...")
        results = model.predict(
            source=image_path,
            save=False,
            verbose=False,
            device=device,
            imgsz=640  # Specify image size
        )
        
        print(f"✅ Detection completed, found {len(results)} result(s)")
        return results, img
        
    except Exception as e:
        print(f"❌ Error during detection: {str(e)}")
        print(f"Error type: {type(e).__name__}")
        return None, None

def visualize_detections(results, img, confidence_threshold=0.5):
    """
    Visualize detection results with labels and confidence scores
    """
    if results is None or img is None:
        print("❌ No results to visualize")
        return
    
    # Convert BGR to RGB for matplotlib
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Get detection results
    result = results[0]
    
    if result.boxes is not None and len(result.boxes) > 0:
        boxes = result.boxes.xyxy.cpu().numpy()  # Bounding boxes
        confidences = result.boxes.conf.cpu().numpy()  # Confidence scores
        classes = result.boxes.cls.cpu().numpy()  # Class indices
        
        # Get class names
        class_names = result.names
        
        print(f"📊 Found {len(boxes)} detections")
        
        # Draw bounding boxes
        for i, (box, conf, cls) in enumerate(zip(boxes, confidences, classes)):
            if conf >= confidence_threshold:
                x1, y1, x2, y2 = map(int, box)
                class_name = class_names[int(cls)]
                
                print(f"  Detection {i+1}: {class_name} ({conf:.2f}) at [{x1}, {y1}, {x2}, {y2}]")
                
                # Draw rectangle
                cv2.rectangle(img_rgb, (x1, y1), (x2, y2), (255, 0, 0), 2)
                
                # Add label with confidence
                label = f"{class_name}: {conf:.2f}"
                label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
                cv2.rectangle(img_rgb, (x1, y1 - label_size[1] - 10), 
                            (x1 + label_size[0], y1), (255, 0, 0), -1)
                cv2.putText(img_rgb, label, (x1, y1 - 5), 
                          cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
    else:
        print("📊 No objects detected")
    
    # Display result
    plt.figure(figsize=(12, 8))
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.title('Object Detection Results')
    plt.tight_layout()
    plt.show()

# Test the detection
image_path = "./data/IMG_4779.jpg"
results, img = safe_object_detection(image_path)

if results is not None:
    visualize_detections(results, img, confidence_threshold=0.3)
else:
    print("❌ Detection failed, trying alternative approach...")

🔄 Loading model: yolov8n.pt
✅ Model loaded on cuda
🔄 Loading image: ./data/IMG_4779.jpg
✅ Image loaded, shape: (1579, 1164, 3)
🔄 Running object detection...
❌ Error during detection: Could not run 'torchvision::nms' with arguments from the 'CUDA' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'torchvision::nms' is only available for these backends: [CPU, Meta, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrap