<a href="https://colab.research.google.com/github/Byeon-MJ/Computer_Vision_Project/blob/main/YOLO_Video_Object_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
# 모듈 임포트
import cv2
import numpy as np
import time
import io
import base64
from IPython.display import HTML

In [3]:
# 원본 동영상 Display
video = io.open('/content/gdrive/MyDrive/CV/Object Detection_YOLO/video/video/street.mp4',
               'r+b').read()

encoded = base64.b64encode(video)
HTML(data = '''<video width = '30%' controls>
                <source src = 'data:video/mp4;base64,{0}' type = 'video/mp4'/>
                </video>'''.format(encoded.decode('ascii')))

In [4]:
# Detection 할 원본 동영상
file_name = '/content/gdrive/MyDrive/CV/Object Detection_YOLO/video/video/street.mp4'
min_confidence = 0.5
output_name = 'street_output_video.mp4'
elapsed_time = 0        # 총 경과시간 초기화

In [5]:
# Load YOLO
net = cv2.dnn.readNet('/content/gdrive/MyDrive/CV/Object Detection_YOLO/yolov3.weights',
                      '/content/gdrive/MyDrive/CV/Object Detection_YOLO/yolov3.cfg')
classes = []

with open('/content/gdrive/MyDrive/CV/Object Detection_YOLO/coco.names', 'r') as f:
    # 80개의 Object(class)를 구분할 수 있는 Object의 이름을 classes 배열에 넣는다.
    classes = [line.strip() for line in f.readlines()]

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Object 마다 컬러를 하나씩 다르게 지정
colors = np.random.uniform(0, 255, size = (len(classes), 3))

In [6]:
# detect & Display 함수 정의
def detectAndDisplay(frame):
    start_time = time.time()
    img = cv2.resize(frame, None, fx=0.9, fy=0.9)
    height, width, channels = img.shape

    # YOLOv3의 Detecting model 3가지(320*320, 416*416, 608*608)
    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > min_confidence:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])              # boxing 정보 저장
                confidences.append(float(confidence))   # 신뢰도 저장
                class_ids.append(class_id)              # Class id 저장

    # 박스 안에 박스(노이즈)를 하나로 만들어준다.
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN

    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]

            # Class 이름, 신뢰도 표시
            label = '{}: {:.2f}'.format(classes[class_ids[i]], confidences[i] * 100)

            print(i, label)
            color = colors[i]
            cv2.rectangle(img, (x, y), (x + w, y + h), color, 1)
            cv2.rectangle(img, (x, y - 20), (x + w, y), color, -1)
            cv2.putText(img, label, (x + 5, y - 5), font, 1, (255, 255, 255), 1)

    process_time = time.time() - start_time

    global elapsed_time
    elapsed_time += process_time     # 총 경과시간 누적

    print('= = = A frame took {:.3f} seconds'.format(process_time))

    # video 를 disk에 output 하기 위해 writer 초기화
    global writer
    if writer is None and output_name is not None:
        fourcc = cv2.VideoWriter_fourcc(*'DIVX')
        writer = cv2.VideoWriter(output_name, fourcc, 30,
                                (img.shape[1], img.shape[0]), True)
    # disk에 frame write
    if writer is not None:
        writer.write(img)

In [7]:
# 원본 동영상에서 video stream 읽어오기
cap = cv2.VideoCapture(file_name)
writer = None
if not cap.isOpened:
    print('- -(!)Error opening video capture')
    exit(0)

while True:
    ret,frame = cap.read()
    if frame is None:
        # close the video file pointers
        cap.release()

        # close the writer point
        writer.release()
        print('- -(!) No captured frame - - Break!')
        print('elapsed time {:.3f} seconds'.format(elapsed_time))
        break

    detectAndDisplay(frame)

6 car: 99.80
8 bus: 92.04
15 car: 85.26
16 car: 93.64
17 car: 98.69
19 person: 91.41
22 person: 79.44
24 car: 71.02
26 car: 93.01
28 handbag: 54.26
29 handbag: 52.69
= = = A frame took 2.398 seconds
4 car: 99.86
6 bus: 90.03
11 car: 84.90
13 car: 95.70
14 car: 99.18
15 person: 95.11
18 person: 86.28
20 car: 67.57
22 car: 95.93
24 handbag: 58.64
= = = A frame took 1.691 seconds
3 car: 99.58
4 bus: 86.50
8 car: 92.51
10 car: 97.41
11 car: 99.21
12 person: 86.89
15 person: 82.37
20 bus: 81.09
21 car: 62.38
23 car: 92.06
24 handbag: 61.19
= = = A frame took 1.118 seconds
2 car: 99.70
5 bus: 84.70
7 car: 96.38
9 car: 97.75
11 car: 99.60
12 person: 85.65
15 person: 70.29
17 bus: 67.67
19 car: 59.86
21 car: 94.20
22 handbag: 60.92
= = = A frame took 1.131 seconds
2 car: 99.75
5 bus: 88.23
7 car: 94.84
8 car: 96.77
10 car: 99.31
12 person: 91.90
13 person: 76.75
16 bus: 84.25
18 car: 94.74
19 handbag: 58.03
= = = A frame took 1.208 seconds
1 car: 99.57
3 bus: 90.23
9 car: 94.97
10 car: 96.53
1