In [1]:
import cv2
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("runs/detect/mango_yolo/weights/best.pt")  # use your exact path

# Open webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run detection
    results = model(frame)

    # Draw results on the frame
    annotated_frame = results[0].plot()

    # Display
    cv2.imshow("YOLOv8 Detection", annotated_frame)

    # Exit with 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 

In [8]:
import cv2
from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO("runs/detect/mango_yolo/weights/best.pt")

# Load video
video_path = "mango_video.mp4"  # Replace with your video path
cap = cv2.VideoCapture(video_path)

# Get video properties
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps    = cap.get(cv2.CAP_PROP_FPS)

# Define video writer to save output
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter("output_with_boxes.mp4", fourcc, fps, (width, height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Inference
    results = model(frame)

    # Annotate frame
    annotated_frame = results[0].plot()

    # Display
    cv2.imshow("YOLOv8 Video Detection", annotated_frame)

    # Save frame to output video
    out.write(annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 12 Mangos, 166.2ms
Speed: 4.3ms preprocess, 166.2ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 Mangos, 257.8ms
Speed: 4.7ms preprocess, 257.8ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 Mangos, 289.3ms
Speed: 6.6ms preprocess, 289.3ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 Mangos, 315.5ms
Speed: 7.2ms preprocess, 315.5ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 Mangos, 180.7ms
Speed: 5.6ms preprocess, 180.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 Mangos, 149.2ms
Speed: 3.0ms preprocess, 149.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 Mangos, 186.9ms
Speed: 3.2ms preprocess, 186.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 Mangos, 158.1ms
Speed: 4.3ms preprocess, 158.1ms inference, 1.2ms postprocess per 

KeyboardInterrupt: 

In [10]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

# initialize the model
processor = DetrImageProcessor.from_pretrained(
    "facebook/detr-resnet-101", revision="no_timm"
)
model = DetrForObjectDetection.from_pretrained(
    "facebook/detr-resnet-101", revision="no_timm"
)

# preprocess the inputs and infer
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# non max supression above 0.9
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(
    outputs, target_sizes=target_sizes, threshold=0.9
)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
        f"Detected {model.config.id2label[label.item()]} with confidence "
        f"{round(score.item(), 3)} at location {box}"
    )

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Detected cat with confidence 0.998 at location [344.06, 24.85, 640.34, 373.74]
Detected remote with confidence 0.997 at location [328.13, 75.93, 372.81, 187.66]
Detected remote with confidence 0.997 at location [39.34, 70.13, 175.56, 118.78]
Detected cat with confidence 0.998 at location [15.36, 51.75, 316.89, 471.16]
Detected couch with confidence 0.995 at location [-0.19, 0.71, 639.73, 474.17]


In [12]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image, ImageDraw, ImageFont # Added ImageDraw and ImageFont
import requests

def draw_bounding_boxes_on_image():
    """
    Fetches an image, performs object detection using a pre-trained DETR model,
    draws bounding boxes and labels on the image, and saves the result.
    """
    # URL of the image to process
    url = "D:\PS\FruitPilot\Object-detection\frame_81_jpg.rf.ecae8437dff5d87f63a2486fd92fa61c.jpg"
    
    # Download and open the image
    try:
        image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading image: {e}")
        return
    except IOError:
        print(f"Error opening image. Please ensure the URL points to a valid image.")
        return

    # Initialize the image processor and model from Hugging Face Transformers
    # Using DETR (DEtection TRansformer) model with a ResNet-101 backbone
    # The "no_timm" revision is specified as in the original code
    try:
        processor = DetrImageProcessor.from_pretrained(
            "facebook/detr-resnet-101", revision="no_timm"
        )
        model = DetrForObjectDetection.from_pretrained(
            "facebook/detr-resnet-101", revision="no_timm"
        )
    except Exception as e:
        print(f"Error loading model or processor: {e}")
        print("Please ensure you have an internet connection and the 'transformers' library is correctly installed.")
        return

    # Preprocess the input image
    # The processor prepares the image in the format expected by the model
    # return_tensors="pt" returns PyTorch tensors
    inputs = processor(images=image, return_tensors="pt")
    
    # Perform inference
    # The model outputs raw predictions (logits and bounding boxes)
    with torch.no_grad(): # Disable gradient calculations for inference
        outputs = model(**inputs)

    # Convert outputs (bounding boxes and class logits) to COCO API format
    # The model outputs coordinates in the format (center_x, center_y, width, height) relative to the image size.
    # Post-processing converts these to (x_min, y_min, x_max, y_max) format.
    # target_sizes expects [height, width]
    target_sizes = torch.tensor([image.size[::-1]]) 
    
    # Apply non-maximum suppression (NMS) with a threshold of 0.9
    # NMS helps to remove duplicate or overlapping bounding boxes for the same object
    results = processor.post_process_object_detection(
        outputs, target_sizes=target_sizes, threshold=0.9
    )[0] # We take the first result as we process a single image

    # Create a drawable version of the image to draw on
    image_with_boxes = image.copy()
    draw = ImageDraw.Draw(image_with_boxes)

    # Attempt to load a font; use default if specific font is not found
    try:
        # You can specify a path to a .ttf font file if you have one
        # font = ImageFont.truetype("arial.ttf", 15) 
        font = ImageFont.load_default() # Using a default font for broader compatibility
    except IOError:
        print("Default font loaded. For custom font, ensure the .ttf file is accessible.")
        font = ImageFont.load_default()

    # Define colors for bounding boxes and text
    box_color = "red"
    text_color = "white"
    text_background_color = "red" # Background for text for better visibility

    # Iterate over detected objects
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        # Get bounding box coordinates and round them
        box_coords = [round(i) for i in box.tolist()] # [x_min, y_min, x_max, y_max]
        
        # Draw the bounding box rectangle
        draw.rectangle(box_coords, outline=box_color, width=3)
        
        # Prepare the label text with class name and confidence score
        label_name = model.config.id2label[label.item()]
        confidence = round(score.item(), 3)
        label_text = f"{label_name}: {confidence}"
        
        # Calculate text size to position it correctly
        # For Pillow version >= 9.2.0, textbbox is preferred.
        # For older versions, textsize was used. textbbox is more accurate.
        try:
            # Get bounding box of the text itself
            text_bbox = draw.textbbox((0,0), label_text, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_height = text_bbox[3] - text_bbox[1]
        except AttributeError: # Fallback for older Pillow versions that don't have textbbox
             # Note: textsize is less accurate than textbbox
            text_size_result = draw.textsize(label_text, font=font) 
            text_width = text_size_result[0]
            text_height = text_size_result[1]


        # Determine position for the text (above the bounding box)
        text_x = box_coords[0]
        text_y = box_coords[1] - text_height - 5  # 5 pixels padding above the box

        # Adjust text position if it goes off the top of the image
        if text_y < 0:
            text_y = box_coords[1] + 5  # Place it just inside the top of the box

        # Draw a filled rectangle as a background for the text
        draw.rectangle(
            (text_x, text_y, text_x + text_width, text_y + text_height),
            fill=text_background_color
        )
        
        # Draw the text
        draw.text((text_x, text_y), label_text, fill=text_color, font=font)

    # Save the image with bounding boxes
    output_image_path = "image_with_bounding_boxes.jpg"
    try:
        image_with_boxes.save(output_image_path)
        print(f"Image with bounding boxes saved to: {output_image_path}")
        
        # Optionally, display the image if in an environment that supports it (e.g., Jupyter Notebook)
        # image_with_boxes.show() 
        
    except IOError:
        print(f"Error saving image to {output_image_path}")
    except Exception as e:
        print(f"An unexpected error occurred while saving/showing the image: {e}")

if __name__ == '__main__':
    draw_bounding_boxes_on_image()


Error downloading image: No connection adapters were found for 'D:\\PS\\FruitPilot\\Object-detection\x0crame_81_jpg.rf.ecae8437dff5d87f63a2486fd92fa61c.jpg'


In [15]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image, ImageDraw, ImageFont
import os # Imported os module for path manipulation

def draw_bounding_boxes_on_image(image_path):
    """
    Loads an image from a local path, performs object detection using a
    pre-trained DETR model, draws bounding boxes and labels on the image,
    and saves the result.

    Args:
        image_path (str): The path to the local image file.
    """
    # Open the local image
    try:
        image = Image.open(image_path).convert("RGB")
    except FileNotFoundError:
        print(f"Error: The file '{image_path}' was not found.")
        return
    except IOError:
        print(f"Error opening image. Please ensure '{image_path}' is a valid image file.")
        return
    except Exception as e:
        print(f"An unexpected error occurred while opening the image: {e}")
        return

    # Initialize the image processor and model from Hugging Face Transformers
    # Using DETR (DEtection TRansformer) model with a ResNet-101 backbone
    # The "no_timm" revision is specified as in the original code
    try:
        processor = DetrImageProcessor.from_pretrained(
            "facebook/detr-resnet-101", revision="no_timm"
        )
        model = DetrForObjectDetection.from_pretrained(
            "facebook/detr-resnet-101", revision="no_timm"
        )
    except Exception as e:
        print(f"Error loading model or processor: {e}")
        print("Please ensure you have an internet connection and the 'transformers' library is correctly installed.")
        return

    # Preprocess the input image
    # The processor prepares the image in the format expected by the model
    # return_tensors="pt" returns PyTorch tensors
    inputs = processor(images=image, return_tensors="pt")
    
    # Perform inference
    # The model outputs raw predictions (logits and bounding boxes)
    with torch.no_grad(): # Disable gradient calculations for inference
        outputs = model(**inputs)

    # Convert outputs (bounding boxes and class logits) to COCO API format
    # The model outputs coordinates in the format (center_x, center_y, width, height) relative to the image size.
    # Post-processing converts these to (x_min, y_min, x_max, y_max) format.
    # target_sizes expects [height, width]
    target_sizes = torch.tensor([image.size[::-1]]) 
    
    # Apply non-maximum suppression (NMS) with a threshold of 0.9
    # NMS helps to remove duplicate or overlapping bounding boxes for the same object
    results = processor.post_process_object_detection(
        outputs, target_sizes=target_sizes, threshold=0.9
    )[0] # We take the first result as we process a single image

    # Create a drawable version of the image to draw on
    image_with_boxes = image.copy()
    draw = ImageDraw.Draw(image_with_boxes)

    # Attempt to load a font; use default if specific font is not found
    try:
        # You can specify a path to a .ttf font file if you have one
        # font = ImageFont.truetype("arial.ttf", 15) 
        font = ImageFont.load_default() # Using a default font for broader compatibility
    except IOError:
        print("Default font loaded. For custom font, ensure the .ttf file is accessible.")
        font = ImageFont.load_default()

    # Define colors for bounding boxes and text
    box_color = "red"
    text_color = "white"
    text_background_color = "red" # Background for text for better visibility

    # Iterate over detected objects
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        # Get bounding box coordinates and round them
        box_coords = [round(i) for i in box.tolist()] # [x_min, y_min, x_max, y_max]
        
        # Draw the bounding box rectangle
        draw.rectangle(box_coords, outline=box_color, width=3)
        
        # Prepare the label text with class name and confidence score
        label_name = model.config.id2label[label.item()]
        confidence = round(score.item(), 3)
        label_text = f"{label_name}: {confidence}"
        
        # Calculate text size to position it correctly
        # For Pillow version >= 9.2.0, textbbox is preferred.
        # For older versions, textsize was used. textbbox is more accurate.
        try:
            # Get bounding box of the text itself
            text_bbox = draw.textbbox((0,0), label_text, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_height = text_bbox[3] - text_bbox[1]
        except AttributeError: # Fallback for older Pillow versions that don't have textbbox
             # Note: textsize is less accurate than textbbox
            text_size_result = draw.textsize(label_text, font=font) 
            text_width = text_size_result[0]
            text_height = text_size_result[1]


        # Determine position for the text (above the bounding box)
        text_x = box_coords[0]
        text_y = box_coords[1] - text_height - 5  # 5 pixels padding above the box

        # Adjust text position if it goes off the top of the image
        if text_y < 0:
            text_y = box_coords[1] + 5  # Place it just inside the top of the box

        # Draw a filled rectangle as a background for the text
        draw.rectangle(
            (text_x, text_y, text_x + text_width, text_y + text_height),
            fill=text_background_color
        )
        
        # Draw the text
        draw.text((text_x, text_y), label_text, fill=text_color, font=font)

    # Construct the output image path
    base, ext = os.path.splitext(image_path)
    output_image_path = f"{base}_with_boxes{ext}"
    
    # Save the image with bounding boxes
    try:
        image_with_boxes.save(output_image_path)
        print(f"Image with bounding boxes saved to: {output_image_path}")
        
        # Optionally, display the image if in an environment that supports it (e.g., Jupyter Notebook)
        # image_with_boxes.show() 
        
    except IOError:
        print(f"Error saving image to {output_image_path}")
    except Exception as e:
        print(f"An unexpected error occurred while saving/showing the image: {e}")

if __name__ == '__main__':
    # IMPORTANT: Replace "your_image.jpg" with the actual path to your local image file.
    # For example:
    # On Windows: local_image_path = r"C:\Users\YourUser\Pictures\my_image.png"
    # On macOS/Linux: local_image_path = "/home/YourUser/Pictures/my_image.jpg"
    local_image_path = "D:\PS\FruitPilot\Object-detection\\frame_81_jpg.rf.ecae8437dff5d87f63a2486fd92fa61c.jpg"  # <--- CHANGE THIS LINE

    if local_image_path == "your_image.jpg":
        print("Please update the 'local_image_path' variable in the script with the actual path to your image.")
    else:
        draw_bounding_boxes_on_image(local_image_path)


Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))


Image with bounding boxes saved to: D:\PS\FruitPilot\Object-detection\frame_81_jpg.rf.ecae8437dff5d87f63a2486fd92fa61c_with_boxes.jpg
