In [1]:

# import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
import cv2
from PIL import Image

In [2]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, autoshape=False)
model.eval()

Using cache found in C:\Users\prana/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2025-3-19 Python-3.11.1 torch-2.6.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3070 Laptop GPU, 8192MiB)



DetectMultiBackend(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C3(
        (cv1): Conv(
          (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, tr

In [3]:
# image_id = '000000000662'
image_id = '000000000139'
image_path = f'C:/Users/prana/fiftyone/coco-2017/validation/data/{image_id}.jpg'

transform = transforms.Compose([
    transforms.Resize((640, 640)),  # Resize to YOLOv5 input size
    transforms.ToTensor()
])
image = Image.open(image_path).convert('RGB')
input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
input_tensor.requires_grad = True  # Enable gradients


In [4]:
# model.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model.to(model.device)

#check device of model
print(model.device)

#check device of input tensor
print(input_tensor.device)

cuda:0
cpu


In [5]:
input_tensor = input_tensor.to(model.device)

In [6]:
# Perform inference
output = model(input_tensor)  # Get detections  
len(output)  # Print results

2

In [7]:
output[0].shape, len(output[1])

(torch.Size([1, 25200, 85]), 3)

In [8]:
output[1][0].shape, output[1][1].shape, output[1][2].shape

(torch.Size([1, 3, 80, 80, 85]),
 torch.Size([1, 3, 40, 40, 85]),
 torch.Size([1, 3, 20, 20, 85]))

<h1> Evaluation </h1>

In [None]:
from ultralytics import YOLO  # Import the YOLOv8 library
import fiftyone as fo
import fiftyone.zoo as foz
from collections import defaultdict
from PIL import Image
import numpy as np
import cv2
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval


# Load Dataset
dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    max_samples = 1000,
    label_types=["detections"],
    )

In [None]:
with open("instances_val2017.json") as f:
    ground_truth = json.load(f)
    
gt_categories = ground_truth['categories']

# COCO mapping from category name to category id
gt = defaultdict(list)
for category in gt_categories:
    gt[category['name']] = category['id']

# COCO mapping from category id to category name
gt_rev = {v: k for k, v in gt.items()}

# YOLO mapping from class index to class name
yolo_classes = {
    0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane", 5: "bus", 6: "train", 7: "truck", 
    8: "boat", 9: "traffic light", 10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench", 
    14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow", 20: "elephant", 21: "bear", 
    22: "zebra", 23: "giraffe", 24: "backpack", 25: "umbrella", 26: "handbag", 27: "tie", 28: "suitcase", 
    29: "frisbee", 30: "skis", 31: "snowboard", 32: "sports ball", 33: "kite", 34: "baseball bat", 
    35: "baseball glove", 36: "skateboard", 37: "surfboard", 38: "tennis racket", 39: "bottle", 
    40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon", 45: "bowl", 46: "banana", 
    47: "apple", 48: "sandwich", 49: "orange", 50: "broccoli", 51: "carrot", 52: "hot dog", 53: "pizza", 
    54: "donut", 55: "cake", 56: "chair", 57: "couch", 58: "potted plant", 59: "bed", 60: "dining table", 
    61: "toilet", 62: "tv", 63: "laptop", 64: "mouse", 65: "remote", 66: "keyboard", 67: "cell phone", 
    68: "microwave", 69: "oven", 70: "toaster", 71: "sink", 72: "refrigerator", 73: "book", 74: "clock", 
    75: "vase", 76: "scissors", 77: "teddy bear", 78: "hair drier", 79: "toothbrush"
}


In [None]:
model_names = [
    "yolov5s",
    "yolov8s",
    "yolov8x",
    "yolov9e",
    "yolov10b",
    "yolov10x",
    "yolo11n",
    "yolo11m",
    "yolo11x",
]

In [None]:
# Load the YOLO model
name = model_names[0]
model = YOLO(f'{name}.pt')  # Use the YOLO model

predictions = []

for sample in dataset:
    image_id = int(sample.filepath.split('\\')[-1].split('.')[0])
    image = Image.open(sample.filepath)

    # Perform inference
    results = model.predict(image, imgsz=640)  # Specify the input size if needed
    detections = results[0].boxes  # Extract bounding boxes

    # Convert detections to the required format
    for detection in detections:
        x1, y1, x2, y2 = detection.xyxy[0].tolist()  # Bounding box coordinates
        conf = detection.conf[0].item()  # Confidence score
        cls = int(detection.cls[0].item())  # Class index
        width, height = x2 - x1, y2 - y1
        predictions.append({
            "image_id": image_id,
            "category_id": gt[yolo_classes[cls]],
            "bbox": [float(x1), float(y1), float(width), float(height)],
            "score": float(conf)
        })

# Save predictions
with open(f"predictions_{name}.json", "w") as f:
    json.dump(predictions, f, indent=4)


Downloading split 'validation' to 'C:\Users\prana\fiftyone\coco-2017\validation' if necessary
Found annotations at 'C:\Users\prana\fiftyone\coco-2017\raw\instances_val2017.json'
Sufficient images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'coco-2017-validation-1000'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:14<00:00, 9.21MB/s] 



0: 448x640 2 persons, 3 chairs, 2 potted plants, 2 dining tables, 2 tvs, 1 microwave, 2 refrigerators, 1 clock, 4 vases, 58.7ms
Speed: 2.2ms preprocess, 58.7ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)

0: 640x608 1 bear, 75.9ms
Speed: 1.8ms preprocess, 75.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 608)

0: 512x640 1 bottle, 1 chair, 2 potted plants, 1 bed, 2 books, 27.3ms
Speed: 1.1ms preprocess, 27.3ms inference, 1.7ms postprocess per image at shape (1, 3, 512, 640)

0: 640x480 1 car, 2 stop signs, 28.9ms
Speed: 1.5ms preprocess, 28.9ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)

0: 640x448 1 couch, 3 teddy bears, 28.6ms
Speed: 1.0ms preprocess, 28.6ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 448)

0: 448x640 1 person, 2 skiss, 28.2ms
Speed: 1.1ms preprocess, 28.2ms inference, 2.6ms postprocess per image at shape (1, 3, 448, 640)

0: 640x448 1 oven, 1 refrigerator, 27.1ms
Speed: 1.0ms preprocess, 27

In [None]:
# Load ground truth
coco_gt = COCO("instances_val2017.json")

# Load predictions
coco_dt = coco_gt.loadRes(f"predictions_{name}.json")
imgIds=sorted(coco_gt.getImgIds())
imgIds=imgIds[0:1000]
imgId = imgIds[np.random.randint(1000)]

# Run COCO evaluation
coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
coco_eval.params.imgIds  = imgIds
coco_eval.evaluate()  # Ensure evaluation runs
coco_eval.accumulate()
coco_eval.summarize()

loading annotations into memory...
Done (t=1.02s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.04s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.90s).
Accumulating evaluation results...
DONE (t=0.50s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.506
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.645
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.545
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.284
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.559
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.663
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.389
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.568
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets

In [None]:
import matplotlib.pyplot as plt
  
bbox =  [
        7.03,
        167.76,
        149.32,
        94.87
      ]

cls = gt_rev[72]

x1, y1, x2, y2 = bbox
image_id = '000000000139'
image_path = f'C:/Users/prana/fiftyone/coco-2017/validation/data/{image_id}.jpg'
image_cv = cv2.imread(image_path)
image_cv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
cv2.rectangle(image_cv, (int(x1), int(y1)), (int(x1+x2), int(y1+y2)), (255, 0, 0), 2)
cv2.putText(image_cv, f"{cls} {conf:.2f}", (int(x1), int(y1) - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

# Show image with detections
plt.figure(figsize=(10, 6))
plt.imshow(image_cv)
plt.axis("off")
plt.savefig("output.jpeg")

In [63]:
import torch
import cv2
import matplotlib.pyplot as plt
from PIL import Image

# Load YOLOv5 model (pre-trained on COCO dataset)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

def plot_detections(image_path):
    # Load image
    image = Image.open(image_path)

    # Perform inference
    results = model(image)
    # Convert results to pandas DataFrame
    df = results.pandas().xyxy[0]  # Bounding boxes in (x1, y1, x2, y2) format

    # Convert PIL image to OpenCV format
    image_cv = cv2.imread(image_path)
    image_cv = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)

    # Plot detections
    for _, row in df.iterrows():
        x1, y1, x2, y2, conf, cls, label = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax']), row['confidence'], row['class'], row['name']
        cv2.rectangle(image_cv, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(image_cv, f"{label} {conf:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    # Show image with detections
    plt.figure(figsize=(10, 6))
    plt.imshow(image_cv)
    plt.axis("off")
    plt.savefig("output.jpeg")
    plt.show()

# Example usage
image_id = '000000000139'
image_path = f'C:/Users/prana/fiftyone/coco-2017/validation/data/{image_id}.jpg'
plot_detections(image_path)


Using cache found in C:\Users\prana/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2025-3-19 Python-3.11.1 torch-2.6.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3070 Laptop GPU, 8192MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):
