In [1]:
import torch
import cv2
import os

# Load the pre-trained model from the .pt file
weights_path = 'yolov5.pt'
model = torch.hub.load('ultralytics/yolov5', 'custom', path=weights_path)

# Define the folder containing the images to be processed
image_folder = 'data/test_yolov5_1'

# Loop over each image in the folder and perform object detection
for image_file in os.listdir(image_folder):
    # Read the image file
    image_path = os.path.join(image_folder, image_file)
    image = cv2.imread(image_path)

    # Perform object detection using the YOLOv5 model
    results = model(image)

    # Extract the coordinates of the bounding boxes of each object detected in the image
    bboxes = results.xyxy[0].cpu().numpy()

    # Loop over each bounding box and print the coordinates
    for bbox in bboxes:
        x1, y1, x2, y2, conf, cls = bbox.tolist()
        print(f"Object detected: class {int(cls)}, confidence {conf:.2f}, BBox: ({x1:.2f}, {y1:.2f}), ({x2:.2f}, {y2:.2f})")

Using cache found in C:\Users\moyni/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-4-15 Python-3.9.13 torch-2.0.0+cpu CPU



[31m[1mrequirements:[0m C:\Users\moyni\.cache\torch\hub\requirements.txt not found, check failed.


Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Object detected: class 33, confidence 0.71, BBox: (1507.78, 253.31), (1596.53, 346.26)
Object detected: class 33, confidence 0.58, BBox: (876.93, 381.23), (926.63, 443.31)
Object detected: class 33, confidence 0.47, BBox: (723.26, 685.14), (772.24, 743.23)
Object detected: class 33, confidence 0.53, BBox: (735.26, 303.29), (789.48, 363.15)
Object detected: class 29, confidence 0.44, BBox: (735.89, 302.57), (789.63, 362.75)
Object detected: class 33, confidence 0.66, BBox: (1117.30, 134.05), (1181.56, 200.70)
Object detected: class 33, confidence 0.68, BBox: (689.28, 420.57), (738.63, 482.25)
Object detected: class 33, confidence 0.68, BBox: (739.66, 39.72), (798.00, 99.56)
Object detected: class 33, confidence 0.58, BBox: (1133.19, 241.73), (1199.64, 311.95)
Object detected: class 33, confidence 0.55, BBox: (740.21, 409.44), (793.60, 471.19)
Object detected: class 29, confidence 0.29, BBox: (738.80, 413.34), (791.38, 470.36)
Object detected: class 33, confidence 0.58, BBox: (750.97, 50

# Offline code

In [8]:
import torch
import os
from glob import glob
from tqdm import tqdm
import cv2
import yaml
from models.yolo import Model

# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# load model
weights_path = 'yolov5.pt'
model_cfg = yaml.load(open("models/yolov5s.yaml"), Loader=yaml.SafeLoader)
model = Model(model_cfg).to(device)

# Define the folder containing the images to be processed
image_folder = 'data/test_yolov5_1'

# Loop over each image in the folder and perform object detection
for image_file in os.listdir(image_folder):
    # Read the image file
    image_path = os.path.join(image_folder, image_file)
    image = cv2.imread(image_path)

    # Perform object detection using the YOLOv5 model
    results = model(image)

    # Extract the coordinates of the bounding boxes of each object detected in the image
    bboxes = results.xyxy[0].cpu().numpy()

    # Loop over each bounding box and print the coordinates
    for bbox in bboxes:
        x1, y1, x2, y2, conf, cls = bbox.tolist()
        print(f"Object detected: class {int(cls)}, confidence {conf:.2f}, BBox: ({x1:.2f}, {y1:.2f}), ({x2:.2f}, {y2:.2f})")



                 from  n    params  module                                  arguments                     
  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 
  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]              
  8                -1  1   1182720  

TypeError: conv2d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !NoneType!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !NoneType!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, int)


In [13]:
import torch
import os
from glob import glob
from tqdm import tqdm
import cv2
import yaml
from models.experimental import attempt_load
from utils.general import non_max_suppression

# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# load model
weights_path = 'yolov5.pt'
model_cfg = yaml.safe_load(open("models/yolov5s.yaml"))
model = Model(model_cfg).to(device)
model.load_state_dict(torch.load(weights_path, map_location=device)['model'].float().state_dict())
model.eval()

# Define the folder containing the images to be processed
image_folder = 'data/test_yolov5_1'

# Loop over each image in the folder and perform object detection
for image_file in os.listdir(image_folder):
    # Read the image file
    image_path = os.path.join(image_folder, image_file)
    image = cv2.imread(image_path)

    # Convert BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Perform object detection using the YOLOv5 model
    results = model(torch.from_numpy(image).to(device).float())

    # Extract the coordinates of the bounding boxes of each object detected in the image
    bboxes = []
    for i, det in enumerate(results):
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = det[:, :4] / image.shape[:2][::-1]
            bboxes.append(det[:, :6].cpu().numpy())

    # Loop over each bounding box and print the coordinates
    for bbox in bboxes:
        for b in bbox:
            x1, y1, x2, y2, conf, cls = b.tolist()
            print(f"Object detected: class {int(cls)}, confidence {conf:.2f}, BBox: ({x1:.2f}, {y1:.2f}), ({x2:.2f}, {y2:.2f})")



                 from  n    params  module                                  arguments                     
  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 
  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]              
  8                -1  1   1182720  

RuntimeError: Given groups=1, weight of size [32, 3, 6, 6], expected input[1, 1277, 1698, 3] to have 3 channels, but got 1277 channels instead

## Ofline try 2

In [14]:
import torch
import cv2
import os
import yaml
from models.yolo import Model

# Load the pre-trained model from the .pt file
weights_path = 'yolov5.pt'
model_cfg = yaml.load(open("models/yolov5s.yaml"), Loader=yaml.SafeLoader)
model = Model(model_cfg).to('cpu')
state_dict = torch.load(weights_path, map_location='cpu')['model'].float().state_dict()
model.load_state_dict(state_dict)
model.eval()

# Define the folder containing the images to be processed
image_folder = 'data/test_yolov5_1'

# Loop over each image in the folder and perform object detection
for image_file in os.listdir(image_folder):
    # Read the image file
    image_path = os.path.join(image_folder, image_file)
    image = cv2.imread(image_path)

    # Perform object detection using the YOLOv5 model
    results = model(torch.from_numpy(image).to('cpu').float())

    # Extract the coordinates of the bounding boxes of each object detected in the image
    bboxes = results.xyxy[0].cpu().numpy()

    # Loop over each bounding box and print the coordinates
    for bbox in bboxes:
        x1, y1, x2, y2, conf, cls = bbox.tolist()
        print(f"Object detected: class {int(cls)}, confidence {conf:.2f}, BBox: ({x1:.2f}, {y1:.2f}), ({x2:.2f}, {y2:.2f})")



                 from  n    params  module                                  arguments                     
  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 
  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]              
  8                -1  1   1182720  

RuntimeError: Given groups=1, weight of size [32, 3, 6, 6], expected input[1, 1277, 1698, 3] to have 3 channels, but got 1277 channels instead