In [16]:

import cv2
import numpy as np
import argparse
import torch
import torchvision
import torchvision.transforms as transforms
from coco_names import COCO_INSTANCE_CATEGORY_NAMES as coco_names

import detect_utils
from PIL import Image

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [18]:
# this will help us create a different color for each class
COLORS = np.random.uniform(0, 255, size=(len(coco_names), 3))

In [19]:
# define the torchvision image transforms
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [20]:
def predict(image, model, device, detection_threshold):
    # transform the image to tensor
    image = transform(image).to(device)
    image = image.unsqueeze(0) # add a batch dimension
    outputs = model(image) # get the predictions on the image
    # print the results individually
    # print(f"BOXES: {outputs[0]['boxes']}")
    # print(f"LABELS: {outputs[0]['labels']}")
    # print(f"SCORES: {outputs[0]['scores']}")
    # get all the predicited class names
    pred_classes = [coco_names[i] for i in outputs[0]['labels'].cpu().numpy()]
    # get score for all the predicted objects
    pred_scores = outputs[0]['scores'].detach().cpu().numpy()
    # get all the predicted bounding boxes
    pred_bboxes = outputs[0]['boxes'].detach().cpu().numpy()
    # get boxes above the threshold score
    boxes = pred_bboxes[pred_scores >= detection_threshold].astype(np.int32)
    return boxes, pred_classes, outputs[0]['labels']

In [21]:
def draw_boxes(boxes, classes, labels, image):
    # read the image with OpenCV
    image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
    for i, box in enumerate(boxes):
        color = COLORS[labels[i]]
        cv2.rectangle(
            image,
            (int(box[0]), int(box[1])),
            (int(box[2]), int(box[3])),
            color, 2
        )
        cv2.putText(image, classes[i], (int(box[0]), int(box[1]-5)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 
                    lineType=cv2.LINE_AA)
    return image

In [22]:
# construct the argument parser
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='path to input image/video')
parser.add_argument('-m', '--min-size', dest='min_size', default=800, help='minimum input size for the FasterRCNN network')
args = vars(parser.parse_args())

usage: ipykernel_launcher.py [-h] [-i INPUT] [-m MIN_SIZE]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\alKushari\AppData\Roaming\jupyter\runtime\kernel-f8a543ee-8187-4d48-be6e-a052ec6b6d47.json


SystemExit: 2

In [23]:
def detect(input, min_size=800):
    # download or load the model from disk
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, min_size=min_size)
    
    image = Image.open(input)
    model.eval().to(device)
    boxes, classes, labels = detect_utils.predict(image, model, device, 0.8)
    image = detect_utils.draw_boxes(boxes, classes, labels, image)
    cv2.imshow('Image', image)
    save_name = f"{input.split('/')[-1].split('.')[0]}_{min_size}"
    cv2.imwrite(f"outputs/{save_name}.jpg", image)
    cv2.waitKey(0)
    

    
    return image

In [24]:
# # download or load the model from disk
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, min_size=args['min_size'])

In [25]:
# image = Image.open(args['input'])
# model.eval().to(device)
# boxes, classes, labels = detect_utils.predict(image, model, device, 0.8)
# image = detect_utils.draw_boxes(boxes, classes, labels, image)
# cv2.imshow('Image', image)
# save_name = f"{args['input'].split('/')[-1].split('.')[0]}_{args['min_size']}"
# cv2.imwrite(f"outputs/{save_name}.jpg", image)
# cv2.waitKey(0)

In [26]:
# python detect.py --input input/horses.jpg
detect("input/horses.jpg")

AttributeError: 'JpegImageFile' object has no attribute 'shape'