In [1]:
import torch
import torch.backends.cudnn as cudnn

from models.experimental import attempt_load
from utils.datasets import letterbox
from utils.general import check_img_size, non_max_suppression, scale_coords

In [2]:
from PIL import Image
import numpy as np
import time
import cv2
import os

In [42]:
# path to image
image_path = 'images/4076.png'
# path to model
model_path = 'yolov5_weights/yolov5m6.pt'
# image size
img_size = 512
# intersection over union threshold
iou_thr = 0.5
# confidence score threshold
con_thr = 0.5

In [12]:
def load_model(weights, device):
    model = attempt_load(weights, map_location=device)
    return model

In [13]:
# select a device and load the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = load_model(model_path, device)

In [27]:
def show_results(img, xyxy, conf):
    h,w,c = img.shape
    tl = 1 or round(0.004 * (h + w) / 2) + 1  # line/font thickness
    x1 = int(xyxy[0])
    y1 = int(xyxy[1])
    x2 = int(xyxy[2])
    y2 = int(xyxy[3])
    cv2.rectangle(img, (x1,y1), (x2, y2), (0,255,0), thickness=tl, lineType=cv2.LINE_AA)

    tf = max(tl - 1, 1)  # font thickness
    cv2.putText(img, str(conf)[:4], (x1, y1 - 2), 0, tl / 1.5, [0, 0, 255], thickness=tf, lineType=cv2.LINE_AA)
    return img

In [46]:
def detect_one(model, device, image_path, img_size, con_thr, iou_thr):
    # load the image and convert it 
    # from BGR to a 3 channel grayscale image
    orgimg = cv2.imread(image_path)  
    orgimg = cv2.cvtColor(orgimg, cv2.COLOR_BGR2GRAY)
    orgimg = np.dstack([orgimg] * 3)
    
    # make a copy of the original image
    img0 = orgimg.copy()
    
    h0, w0 = orgimg.shape[:2]  # orig hw
    r = img_size / max(h0, w0)  # resize image to img_size
    if r != 1:  # always resize down, only resize up if training with augmentation
        interp = cv2.INTER_AREA if r < 1  else cv2.INTER_LINEAR
        img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)

    # check img_size
    imgsz = check_img_size(img_size, s=model.stride.max()) 
    img = letterbox(img0, new_shape=imgsz)[0]

    # Preprocess the image
    img = img[:, :, ::-1].transpose(2, 0, 1).copy()
    img = torch.from_numpy(img).to(device)
    img = img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # Inference
    pred = model(img)[0]

    # Apply NMS
    pred = non_max_suppression(pred, con_thr, iou_thr)
        
    # Process predictions
    for det in pred:  # per image
        # Rescale boxes from img_size to im0 size
        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], orgimg.shape).round()
        
        # extract bounding boxes to draw them
        # on the original image
        for j in range(det.size()[0]):
            bounding_box = det[j, :4].numpy().tolist()
            conf = det[j, 4].cpu().numpy()
            orgimg = show_results(orgimg, bounding_box, conf)
            
    img = Image.fromarray(cv2.cvtColor(orgimg, cv2.COLOR_BGR2RGB))
    img.save('yolov5.png')
    img.show()

In [47]:
detect_one(model, device, image_path, img_size, con_thr, iou_thr)