# 객체검출

## 파이썬 객체 검출
객체 검출 : 이미지에서 의미있는 개체를 탐지하는 알고리즘
    - 검출 : 이미지에서 특정 클래스를 찾는 것
    - 인식 : 검출된 대상이 어떤 객체인지 식별하는 것을 의미


In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

def download_images():
    base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/test_images/'
    filenames = ['image1.jpg', 'image2.jpg']
    image_paths = []
    for filename in filenames:
        image_path = tf.keras.utils.get_file(fname=filename,
                                            origin=base_url + filename,
                                            untar=False)
        image_path = pathlib.Path(image_path)
        image_paths.append(str(image_path))
    return image_paths

IMAGE_PATHS = download_images()

In [2]:
# Download and extract model
def download_model(model_name, model_date):
    base_url = 'http://download.tensorflow.org/models/object_detection/tf2/'
    model_file = model_name + '.tar.gz'
    model_dir = tf.keras.utils.get_file(fname=model_name,
                                        origin=base_url + model_date + '/' + model_file,
                                        untar=True)
    return str(model_dir)

MODEL_DATE = '20200711'
MODEL_NAME = 'ssd_mobilenet_v2_320x320_coco17_tpu-8'
PATH_TO_MODEL_DIR = download_model(MODEL_NAME, MODEL_DATE)

In [3]:
import pathlib
# Download labels file
def download_labels(filename):
    base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/'
    label_dir = tf.keras.utils.get_file(fname=filename,
                                        origin=base_url + filename,
                                        untar=False)
    label_dir = pathlib.Path(label_dir)
    return str(label_dir)

LABEL_FILENAME = 'mscoco_label_map.pbtxt'
PATH_TO_LABELS = download_labels(LABEL_FILENAME)

In [4]:
import time
# from object_detection.utils import label_map_util
# from object_detection.utils import visualization_utils as viz_utils

PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model"

print('Loading model...', end='')
start_time = time.time()

# Load saved model and build the detection function
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

Loading model...Done! Took 9.32922887802124 seconds


## 텐서플로우
> Q1. tf.version 명령을 통해 설치된 텐서플로우 버전을 확인해봅시다. 

In [5]:
import tensorflow as tf
tf.version

<module 'tensorflow._api.v2.version' from '/home/aiffel-dj63/anaconda3/envs/aiffel/lib/python3.7/site-packages/tensorflow/_api/v2/version/__init__.py'>

> Q2. 이 예제에서는 SSD MobileNet v2 320x320 모델을 사용합니다. 해당 모델을 다운로드해봅시다.

https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1


> Q3. 마찬가지로, 객체 이름을 표시하기 위해 라벨 파일이 저장된 페이지를 방문하여 라벨 맵을 다운로드해봅시다.

https://github.com/tensorflow/models/tree/master/research/object_detection/data


> Q4. 예제 9.2(그래프 정의) 를 참조하여 그래프를 읽어오세요.

### 텐서변환 코드

In [6]:
import cv2
import numpy as np
import tensorflow as tf
import os 

model = tf.saved_model.load(PATH_TO_SAVED_MODEL)
capture = cv2.VideoCapture(os.getenv('HOME') + '/OpenCV/bird.mp4')

while True:
    ret, frame = capture.read()

    if capture.get(cv2.CAP_PROP_POS_FRAMES) == capture.get(cv2.CAP_PROP_FRAME_COUNT):
        break

    #---구판에만 있는 resize코드(아마 개정판이 되며 동영상 자체 사이즈를 수정했나봅니다.)--
    #input_img = cv2.resize(frame, (300,300))
    #-----------------------------------------------------------------------
    input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_tensor = tf.convert_to_tensor(input_img)
    input_tensor = input_tensor[tf.newaxis, ...]

### 모델 추론

In [7]:
output_dict = model.signatures["serving_default"](input_tensor)

classes = output_dict["detection_classes"][0]
scores = output_dict["detection_scores"][0]
boxes = output_dict["detection_boxes"][0]

print(classes)
print(scores)
print(boxes)

tf.Tensor(
[64. 64. 64. 86. 49. 67. 67. 51. 67. 56. 50. 86. 47. 47. 56. 64.  1. 64.
 64. 62.  1. 64. 64. 64. 61. 64. 86. 56. 53. 50. 64. 47. 64. 51. 46. 56.
 86. 64. 56. 86. 64. 64. 67.  1. 48. 64.  1. 56. 51. 59. 86. 86. 56. 50.
 86. 86. 64. 62. 47. 51. 44. 64. 64. 51. 64. 56. 64. 64. 49. 64. 47. 51.
 64. 86. 64. 56. 86. 86.  1. 54. 46. 64. 86. 63.  1. 57. 46. 86. 47. 47.
 64. 64. 62. 15.  1. 64. 44. 64. 56.  1.], shape=(100,), dtype=float32)
tf.Tensor(
[0.45407227 0.44321972 0.385714   0.35693544 0.32927752 0.27169773
 0.2630513  0.25060666 0.24908853 0.24881224 0.24601169 0.24532165
 0.24185033 0.24025491 0.23745428 0.23200627 0.2248104  0.22283354
 0.22207609 0.21521445 0.21046485 0.2096801  0.20884801 0.20369044
 0.2017944  0.20018174 0.19629028 0.19234046 0.18856114 0.18456899
 0.18396156 0.17573424 0.17548677 0.17506516 0.17423509 0.1731333
 0.17213508 0.17143817 0.17016506 0.1699915  0.16862968 0.16749884
 0.16661674 0.1660004  0.16388856 0.16208312 0.16121574 0.15908481
 0.158

> Q6. 아래의 full 버전 코드에 직접 주석을 달아보며 흐름을 파악해봅시다.

### 추론 결과 표시

In [9]:
import cv2
import numpy as np
import tensorflow as tf

model = tf.saved_model.load(PATH_TO_SAVED_MODEL)
capture = cv2.VideoCapture(os.getenv('HOME') + '/OpenCV/bird.mp4')

while True:
    ret, frame = capture.read()

    if capture.get(cv2.CAP_PROP_POS_FRAMES) == capture.get(cv2.CAP_PROP_FRAME_COUNT):
        break

    input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_tensor = tf.convert_to_tensor(input_img)
    input_tensor = input_tensor[tf.newaxis, ...]

    output_dict = model.signatures["serving_default"](input_tensor)

    classes = output_dict["detection_classes"][0]
    scores = output_dict["detection_scores"][0]
    boxes = output_dict["detection_boxes"][0]

    height, width, _ = frame.shape
    for idx, score in enumerate(scores):
        if score > 0.7:
            class_id = int(classes[idx])
            box = boxes[idx]

            x1 = int(box[1] * width)
            y1 = int(box[0] * height)
            x2 = int(box[3] * width)
            y2 = int(box[2] * height)

            cv2.rectangle(frame, (x1, y1), (x2, y2), 255, 1)
            cv2.putText(frame, str(class_id) + ":" + str(float(score)), (x1, y1 - 5), cv2.FONT_HERSHEY_COMPLEX, 1.5, (0, 255, 255), 1)

    cv2.imshow("Object Detection", frame)
    if cv2.waitKey(33) == ord("q"):
        break

## 정규표현식

In [8]:
with open(PATH_TO_LABELS, "rt") as f:
    pb_classes = f.read().rstrip("\n").split("\n")

    print(pb_classes[0])
    print(pb_classes[1])
    print(pb_classes[2])
    print(pb_classes[3])
    print(pb_classes[4])

item {
  name: "/m/01g317"
  id: 1
  display_name: "person"
}


> Q9. 아래의 예제(정규표현식을 활용해 입력 문자열에서 패턴 검출하기)를 수행하고, 주석을 달아보세요.

In [9]:
import re


with open(PATH_TO_LABELS, "rt") as f:
    pb_classes = f.read().rstrip("\n").split("\n")
    classes_label = dict()
    
    for i in range(0, len(pb_classes), 5):
        pb_classId = int(re.findall("\d+", pb_classes[i + 2])[0])
        pattern = 'display_name: "(.*?)"'
        pb_text = re.search(pattern, pb_classes[i + 3])
        classes_label[pb_classId] = pb_text.group(1)

> Q10. 아래의 코드는 텐서플로우를 활용한 객체 검출의 전체 코드입니다. 전체 순서를 파악해보세요.

In [11]:
import re
import cv2
import numpy as np
import tensorflow as tf


with open(PATH_TO_LABELS, "rt") as f:
    pb_classes = f.read().rstrip("\n").split("\n")
    classes_label = dict()

    for i in range(0, len(pb_classes), 5):
        pb_classId = int(re.findall("\d+", pb_classes[i + 2])[0])
        pattern = 'display_name: "(.*?)"'
        pb_text = re.search(pattern, pb_classes[i + 3])
        classes_label[pb_classId] = pb_text.group(1)

model = tf.saved_model.load(PATH_TO_SAVED_MODEL)
capture = cv2.VideoCapture(os.getenv('HOME') + '/OpenCV/bird.mp4')

while True:
    ret, frame = capture.read()

    if capture.get(cv2.CAP_PROP_POS_FRAMES) == capture.get(cv2.CAP_PROP_FRAME_COUNT):
        break

    input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_tensor = tf.convert_to_tensor(input_img)
    input_tensor = input_tensor[tf.newaxis, ...]

    output_dict = model.signatures["serving_default"](input_tensor)

    classes = output_dict["detection_classes"][0]
    scores = output_dict["detection_scores"][0]
    boxes = output_dict["detection_boxes"][0]

    height, width, _ = frame.shape
    for idx, score in enumerate(scores):
        if score > 0.7:
            class_id = int(classes[idx])
            box = boxes[idx]

            x1 = int(box[1] * width)
            y1 = int(box[0] * height)
            x2 = int(box[3] * width)
            y2 = int(box[2] * height)

            cv2.rectangle(frame, (x1, y1), (x2, y2), 255, 1)
            cv2.putText(frame, classes_label[class_id] + ":" + str(float(score)), (x1, y1 - 5), cv2.FONT_HERSHEY_COMPLEX, 1.5, (0, 255, 255), 1)

    cv2.imshow("Object Detection", frame)
    if cv2.waitKey(33) == ord("q"):
        break