In [7]:
#!/usr/bin/python
#coding:utf-8
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
from util import *
import argparse
import os
import os.path as osp
from darknet import Darknet
from preprocess import preprocess_image, inp_to_image
import pandas as pd
import random
import itertools

# 参数设置
images="imgs"
dets="det" #存放检测结果
batch_size=1 #batch大小
confidence=0.5 # 目标存在置信度阈值
nms_thresh=0.4 # NMS非极大值抑制阈值
cfg="cfg/yolov3.cfg" #配置文件"
weights="yolov3.weights"
resize="416" # 操作图片大小，str类型
scales="1,2,3" # 缩放尺度用于检测
CUDA = torch.cuda.is_available() # GPU环境是否可用
num_class, classes = load_classes("data/coco.names") # coco 数据集
print("Initializing network model...")
model = Darknet(cfg)
model.load_weights(weights)
print("The model restored successively！")
model.net_info["height"] = int(resize) # 网络输入数据大小
input_dim = model.net_info["height"]
assert input_dim > 0 and input_dim % 32 == 0
if CUDA:
    # 如果GPU可用, 模型切换到cuda中运行
    model.cuda()
model.eval() # 评估模式
try:
    # 加载待检测图像列表
    imlist = [os.path.join(images, img) for img in os.listdir(images) if os.path.splitext(img)[1] in [".png", ".jpg", ".jpeg"]]
except NotADirectoryError:
    imlist = []
    imlist.append(os.path.join(images))
except FileNotFoundError:
    print("%s invalid file" % (images,))
    exit()
if not os.path.exists(dets):
    os.mkdir(dets)
load_batch = time.time()
# 加载全部待检测图像
# map: 转换函数prep_image, 两个数组 imlist [input_dim for x in imlist] 为其提供参数
batches = list(map(preprocess_image, imlist, [input_dim for x in imlist]))
ptt_images = [x[0] for x in batches] # ptt: pytorch_tensor
ori_images = [x[1] for x in batches]
ori_images_dim_list = [x[2] for x in batches]
# repeat(*size), 沿着指定维度复制数据
# 注: size维度必须和数据本身维度要一致
ori_images_dim_list = torch.FloatTensor(ori_images_dim_list).repeat(1, 2) # (11,4) 原始图像尺寸

if CUDA:
    ori_images_dim_list = ori_images_dim_list.cuda()

# 所有检测结果
# objs = []
i = 0 # 第i个图像批次
#  批处理 ...
write = False
if batch_size>1:
    # batch >1 支持实现
    num_batches = int(len(imlist)/batch_size + 0.5)
    ptt_images = [torch.cat(
                    (ptt_images[ i * batch_size: min((i + 1) * batch_size, len(ptt_images)) ]) )
                  for i in range(num_batches)]

# 暂未支持batch>1
for batch in ptt_images:
    start = time.time()
    if CUDA:
        batch = batch.cuda()

    with torch.no_grad():
        predictions = model(Variable(batch), CUDA)
    # 结果过滤
    predictions = write_results(predictions, confidence, num_class, nms=True, nms_thresh=nms_thresh)

    if type(predictions) == int:
        i += 1
        continue
    end = time.time()

    print(end - start, predictions.shape) # 单位 秒

    predictions[:, 0] += i * batch_size # [0]表示图像索引

    if  not write:
        output = predictions
        write = True
    else:
        output = torch.cat((output, predictions))

    for im_num, image in enumerate(imlist[i*batch_size: min((i+1)*batch_size, len(imlist))]):
        im_ind =  i*batch_size + im_num # 图像编号
        objs = [ classes[int(x[-1])] for x in output if int(x[0]) == im_ind] # 输出第im_ind图像结果
        print("{0:20s} predicted in {1:6.3f} seconds".format(osp.split(image)[-1], (end-start)/batch_size))
        print("{0:20s} {1:s}".format("Objects detected", " ".join(objs)))
        print("----------------------------------------------------------")
    i += 1


# 对所有的输入的检测结果
try:
    output
except:
    print("没有检测到任何目标")
    exit()

# 0: 图像索引 1-4： 坐标(在缩放后图像中的位置) 5：score 6： ？？？ 7（-1）：类别
# print(output)

# 对output结果按照dim=0维度分组？
ori_images_dim_list = torch.index_select(ori_images_dim_list, 0, output[:, 0].long()) # pytorch 切片torch.index_select(data, dim, indices)
scaling_factor = torch.min(input_dim/ori_images_dim_list, 1)[0].view(-1, 1)
# 坐标换算,将居中的位置坐标转换为以(0,0)为起点的坐标 x-x'soffset, y-y'soffset
output[:, [1,3]] -= (input_dim - scaling_factor*ori_images_dim_list[:, 0].view(-1,1))/2 # x416 - (缩放后x<=416长度/2 )
output[:, [2,4]] -= (input_dim - scaling_factor*ori_images_dim_list[:, 1].view(-1,1))/2
output[:, 1:5] /= scaling_factor # 缩放至原图大小尺寸

# 绘图
colors = [(39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86)]
def draw(x, batch, results):
    # batch 转换后的图像， 没用到这里
    c1 = tuple(x[1:3].int()) # x1,y1
    c2 = tuple(x[3:5].int()) # x2,y2
    img = results[int(x[0])] # 图像索引
    cls = int(x[-1])
    label = "%s" % classes[cls]
    color = random.choice(colors) # 随机选择颜色
    # 绘图（绘制一条结果）
    cv2.rectangle(img, c1, c2, color, 1)
    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
    c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
    cv2.rectangle(img, c1, c2, color, -1)
    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
    return img
# 开始逐条绘制output中结果
list(map(lambda x: draw(x, ptt_images, ori_images), output))
# 保存文件路径
det_names = ["{}/det_{}".format(dets, osp.split(x)[-1]) for x in imlist]
list(map(cv2.imwrite, det_names, ori_images))

Initializing network model...
The model restored successively！
(0.10206317901611328, torch.Size([1, 8]))
img2.jpg             predicted in  0.102 seconds
Objects detected     train
----------------------------------------------------------
(0.098175048828125, torch.Size([2, 8]))
img1.jpg             predicted in  0.098 seconds
Objects detected     person dog
----------------------------------------------------------
(0.09610199928283691, torch.Size([1, 8]))
eagle.jpg            predicted in  0.096 seconds
Objects detected     bird
----------------------------------------------------------
(0.10428595542907715, torch.Size([9, 8]))
img3.jpg             predicted in  0.104 seconds
Objects detected     car car car car car car car truck traffic light
----------------------------------------------------------
(0.09672403335571289, torch.Size([3, 8]))
person.jpg           predicted in  0.097 seconds
Objects detected     person dog horse
--------------------------------------------------------

[True, True, True, True, True, True, True, True, True, True, True, True]