# YOLOV3目标检测

YOLOV3算法精讲与论文逐句精读：同济子豪兄：https://space.bilibili.com/1900783

不注明出处的转载视为侵权。

2021-08-28

# 准备文件，导入工具包

YOLOV3-模型权重文件：https://pjreddie.com/darknet/yolo/

COCO数据集80个类别名称：https://github.com/pjreddie/darknet/blob/master/data/coco.names

In [None]:
import cv2

import numpy as np

In [None]:
# 导入python绘图matplotlib
import matplotlib.pyplot as plt
# 使用ipython的魔法方法，将绘制出的图像直接嵌入在notebook单元格中
%matplotlib inline
# 定义可视化图像函数
def look_img(img):
    '''opencv读入图像格式为BGR，matplotlib可视化格式为RGB，因此需将BGR转RGB'''
    img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img_RGB)
    plt.show()

# 导入预训练YOLOV3模型

In [None]:
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')

# 导入COCO数据集80个类别

In [None]:
with open('coco.names','r') as f:
    classes = f.read().splitlines()

In [None]:
classes

# 导入图像

In [None]:
img = cv2.imread('test.jpg')

In [None]:
look_img(img)

In [None]:
img.shape

In [None]:
# 获取图像宽高
height, width, _ = img.shape

# 对图像预处理

In [None]:
blob = cv2.dnn.blobFromImage(img, 1/255, (416,416), (0,0,0), swapRB=True, crop=False)

In [None]:
blob.shape

In [None]:
net.setInput(blob)

# 探索YOLOV3神经网络

In [None]:
# 获取网络所有层名字
net.getLayerNames()

In [None]:
# # 获取某一层的权重值
# net.getParam('conv_14').shape

In [None]:
# 获取三个尺度输出层的索引号
net.getUnconnectedOutLayers()

In [None]:
# 获取三个尺度输出层的名称
layersNames = net.getLayerNames()
# print(layersNames)
# output_layers_names = [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
output_layers_names = [layersNames[i-1] for i in net.getUnconnectedOutLayers()]
# output_layers_names

# 输入YOLOV3神经网络，前向推断预测

In [None]:
# 前向推断
prediction = net.forward(output_layers_names)

# 分析YOLOV3三个尺度的输出结果

In [None]:
len(prediction)

In [None]:
prediction[0].shape

In [None]:
prediction[1].shape

In [None]:
prediction[2].shape

$13 \times 13 \times 3 = 507$

$26 \times 26 \times 3 = 2028$

$52 \times 52 \times 3 = 8112$

In [None]:
# 查看第二个尺度，索引为99的框的85维向量
prediction[1][99].shape

In [None]:
prediction[1][99]

# 从三个尺度输出结果中解析所有预测框信息

In [None]:
# 存放预测框坐标
boxes = []

# 存放置信度
objectness = []

# 存放类别概率
class_probs = []

# 存放预测框类别索引号
class_ids = []

# 存放预测框类别名称
class_names = []

In [None]:
for scale in prediction: # 遍历三种尺度
    for bbox in scale: # 遍历每个预测框
        obj = bbox[4] # 获取该预测框的confidence（objectness）
        class_scores = bbox[5:] # 获取该预测框在COCO数据集80个类别的概率
        class_id = np.argmax(class_scores) # 获取概率最高类别的索引号
        class_name = classes[class_id] # 获取概率最高类别的名称
        class_prob = class_scores[class_id] # 获取概率最高类别的概率
        
        # 获取预测框中心点坐标、预测框宽高
        center_x = int(bbox[0] * width)
        center_y = int(bbox[1] * height)
        w = int(bbox[2] * width)
        h = int(bbox[3] * height)
        # 计算预测框左上角坐标
        x = int(center_x - w/2)
        y = int(center_y - h/2)
        
        # 将每个预测框的结果存放至上面的列表中
        boxes.append([x, y, w, h])
        objectness.append(float(obj))
        class_ids.append(class_id)
        class_names.append(class_name)
        class_probs.append(class_prob)

In [None]:
boxes

In [None]:
len(boxes)

In [None]:
len(objectness)

# 将预测框置信度objectness与各类别置信度class_pred相乘，获得最终该预测框的置信度confidence

In [None]:
confidences = np.array(class_probs) * np.array(objectness)

In [None]:
len(confidences)

# objectness、class_pred、confidence三者的关系

In [None]:
plt.plot(objectness, label='objectness')
plt.plot(class_probs, label='class_prob')
plt.plot(confidences, label='confidence')
plt.legend()
plt.show()

# 置信度过滤、非极大值抑制NMS

In [None]:
CONF_THRES = 0.1 # 指定置信度阈值，阈值越大，置信度过滤越强
NMS_THRES = 0.4 # 指定NMS阈值，阈值越小，NMS越强

In [None]:
indexes = cv2.dnn.NMSBoxes(boxes, confidences, CONF_THRES, NMS_THRES)

In [None]:
indexes.flatten()

In [None]:
len(indexes.flatten())

In [None]:
# 随机给每个预测框生成一种颜色
# colors = np.random.uniform(0, 255, size=(len(boxes),3))
colors = [[255,0,255],[0,0,255],[0,255,255],[0,255,0],[255,255,0],[255,0,0],[180,187,28],[223,155,6],[94,218,121],[139,0,0],[77,169,10],[29,123,243],[66,77,229],[1,240,255],[140,47,240],[31,41,81],[29,123,243],[16,144,247],[151,57,224]]

In [None]:
# 遍历留下的每一个预测框，可视化
for i in indexes.flatten():
    
    # 获取坐标
    x, y, w, h = boxes[i]
    # 获取置信度
    confidence = str(round(confidences[i],2))
    # 获取颜色，画框
    color = colors[i%len(colors)]
    cv2.rectangle(img, (x,y), (x+w,y+h), color, 8)
    
    # 写类别名称和置信度
    # 图片，添加的文字，左上角坐标，字体，字体大小，颜色，字体粗细
    string = '{} {}'.format(class_names[i], confidence)
    cv2.putText(img, string, (x, y+20), cv2.FONT_HERSHEY_PLAIN, 3, (255,255,255),5)

In [None]:
look_img(img)

In [None]:
# 保存图片
cv2.imwrite('result-test.jpg',img)