# Import

In [1]:
from __future__ import division

from models import *
from utils.utils import *
from utils.datasets import *

import os
import sys
import time
import datetime
import argparse
import cv2

from PIL import Image

import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.ticker import NullLocator


# Arg Parse

In [2]:
class opt:
    image_folder = "data/samples"
    model_def = "config/yolov3.cfg"
    weights_path = "weights/yolov3.weights"
    class_path = "data/coco.names"
    conf_thres = 0.8
    nms_thres = 0.4
    batch_size = 1
    n_cpu = 0
    img_size = 416
    checkpoint_model= str()

In [3]:
def plot_one_box(x, img, color=1, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

In [4]:
def figure_to_array(fig):
    """
    plt.figure를 RGBA로 변환(layer가 4개)
    shape: height, width, layer
    """
    fig.canvas.draw()
    return np.array(fig.canvas.renderer._renderer)

# Model Load

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

os.makedirs("output", exist_ok=True)

# Set up model
model = Darknet(opt.model_def, img_size=opt.img_size).to(device)

if opt.weights_path.endswith(".weights"):
    # Load darknet weights
    model.load_darknet_weights(opt.weights_path)
else:
    # Load checkpoint weights
    model.load_state_dict(torch.load(opt.weights_path))

model.eval()  # Set in evaluation mode

dataloader = DataLoader(
    ImageFolder(opt.image_folder, img_size=opt.img_size),
    batch_size=opt.batch_size,
    shuffle=False,
    num_workers=opt.n_cpu,
)

classes = load_classes(opt.class_path)  # Extracts class labels from file

Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

imgs = []  # Stores image paths
img_detections = []  # Stores detections for each image index

# Video Define

In [6]:
cam = cv2.VideoCapture('./../drive_fornt_sample.mp4')

# Webcam

In [7]:
frames = 0
start = time.time()

# ret_val, img = cam.read()
# img_size = img.shape[:2]

while True:
    ret_val, img = cam.read()
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # Mirror 
    img = cv2.flip(img, 1)
    img_re = cv2.resize(img, (416, 416))
    
    input_imgs = transforms.ToTensor()(img_re)
    input_imgs = torch.unsqueeze(input_imgs, 0).to(device)

    # Get detections
    with torch.no_grad():
        detections = model(input_imgs)
        detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
        img_detections.extend(detections)
    
    # Create plot
    # Draw bounding boxes and labels of detections
    if detections[0] is not None:
        # Rescale boxes to original image
#         detections = rescale_boxes(detections[0], opt.img_size, img.shape[:2])

        for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections[0]:
            plot_one_box((x1,y1,x2,y2), img_re, label=classes[int(cls_pred)])
    
    frames += 1
    intv = time.time() - start
    if intv > 1:
        print("FPS of the video is {:5.2f}".format( frames / intv ))
        print(detections)
        start = time.time()
        frames = 0
    
    cv2.imshow('Demo webcam', img_re)
    if cv2.waitKey(0) & 0xFF == ord('q'):
        break
        
cam.release()
cv2.destroyAllWindows()

FPS of the video is  0.41
[tensor([[ 66.4197, 257.0344, 131.2004, 307.3551,   0.9992,   0.9997,   2.0000],
        [322.4802, 234.2759, 416.0887, 387.4740,   0.9978,   0.9992,   2.0000],
        [140.2990, 260.5547, 168.8712, 290.4017,   0.9989,   0.9965,   2.0000],
        [ -1.5365, 241.5591,  79.9595, 334.4342,   0.9966,   0.9950,   2.0000],
        [226.9790, 265.3445, 251.6054, 293.7183,   0.9679,   0.9848,   2.0000],
        [164.0794, 261.5971, 183.1074, 285.9209,   0.9564,   0.9916,   2.0000],
        [202.0893, 262.3770, 217.6646, 284.5333,   0.9669,   0.9474,   2.0000],
        [192.1325, 262.9680, 200.4778, 278.9627,   0.8471,   0.9935,   2.0000],
        [216.4868, 263.6098, 225.4986, 279.3086,   0.8496,   0.9614,   2.0000]])]
FPS of the video is  6.82
[tensor([[ 20.9890, 252.3455, 112.7417, 320.1380,   0.9935,   0.9966,   2.0000],
        [128.4338, 257.9867, 161.0182, 292.7920,   0.9971,   0.9876,   2.0000],
        [323.0310, 236.8269, 415.1489, 362.6141,   0.9989,   0.9

FPS of the video is  5.85
[tensor([[108.4131, 257.3834, 180.3452, 330.6579,   0.9972,   0.9967,   2.0000],
        [ 55.0750, 262.1215, 115.9497, 315.9354,   0.9927,   0.9955,   2.0000],
        [252.3196, 270.4709, 266.4370, 284.4019,   0.9739,   0.9985,   2.0000],
        [225.6403, 266.8250, 244.1332, 292.2647,   0.9587,   0.9796,   2.0000],
        [204.1886, 265.0894, 220.3957, 286.2959,   0.9726,   0.9638,   2.0000],
        [172.0628, 269.7248, 188.1687, 286.6862,   0.8495,   0.9964,   2.0000]])]
FPS of the video is 11.75
[tensor([[ 52.2482, 258.2798, 109.2206, 318.0176,   0.9983,   0.9988,   2.0000],
        [103.3222, 254.9913, 177.7475, 330.7126,   0.9998,   0.9928,   2.0000],
        [251.2337, 269.7424, 265.8298, 283.9448,   0.9847,   0.9973,   2.0000],
        [226.0137, 267.3205, 243.6996, 291.6297,   0.9579,   0.9832,   2.0000],
        [174.2040, 267.6449, 188.5639, 286.6794,   0.8575,   0.9946,   2.0000],
        [204.5783, 262.8850, 219.5332, 285.9009,   0.9598,   0.8