# Hello Object Detection

A very basic introduction to using object detection models with OpenVINO™.

The [horizontal-text-detection-0001](https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/horizontal-text-detection-0001/README.md) model from [Open Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/) is used. It detects horizontal text in images and returns a blob of data in the shape of `[100, 5]`. Each detected text box is stored in the `[x_min, y_min, x_max, y_max, conf]` format, where the
`(x_min, y_min)` are the coordinates of the top left bounding box corner, `(x_max, y_max)` are the coordinates of the bottom right bounding box corner and `conf` is the confidence for the predicted class.


#### Table of contents:

- [Imports](#Imports)
- [Download model weights](#Download-model-weights)
- [Select inference device](#Select-inference-device)
- [Load the Model](#Load-the-Model)
- [Load an Image](#Load-an-Image)
- [Do Inference](#Do-Inference)
- [Visualize Results](#Visualize-Results)


### Installation Instructions

This is a self-contained example that relies solely on its own code.

We recommend  running the notebook in a virtual environment. You only need a Jupyter server to start.
For details, please refer to [Installation Guide](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/README.md#-installation-guide).

<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=5b5a4db0-7875-4bfb-bdbd-01698b5b1a77&file=notebooks/hello-detection/hello-detection.ipynb" />


In [1]:
# 오픈비노 설치
%pip install -q "openvino>=2023.1.0" opencv-python tqdm

Note: you may need to restart the kernel to use updated packages.


## Imports
[back to top ⬆️](#Table-of-contents:)


In [2]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import openvino as ov
from pathlib import Path

# Fetch `notebook_utils` module  모듈설치 (한번만 해주면 된다.)
import requests #웹에 접속할 때 쓰는 라이브러리

r = requests.get(
    url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
)

open("notebook_utils.py", "w").write(r.text)

#모델도 있이 있으면 필요없음(한번만 필요함)

from notebook_utils import download_file, device_widget

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, '현재 연결은 원격 호스트에 의해 강제로 끊겼습니다', None, 10054, None))

## Download model weights
[back to top ⬆️](#Table-of-contents:)


In [None]:
# 모델이 없으면 모델을 만들고 다운로드한다.
base_model_dir = Path("./model").expanduser()

model_name = "horizontal-text-detection-0001"
model_xml_name = f"{model_name}.xml"
model_bin_name = f"{model_name}.bin"

model_xml_path = base_model_dir / model_xml_name
model_bin_path = base_model_dir / model_bin_name

if not model_xml_path.exists():
    model_xml_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.xml"
    model_bin_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.bin"

    download_file(model_xml_url, model_xml_name, base_model_dir)
    download_file(model_bin_url, model_bin_name, base_model_dir)
else:
    print(f"{model_name} already downloaded to {base_model_dir}")

## Select inference device
[back to top ⬆️](#Table-of-contents:)

select device from dropdown list for running inference using OpenVINO

In [None]:
device = device_widget()
device

## Load the Model
[back to top ⬆️](#Table-of-contents:)


In [None]:
core = ov.Core()

model = core.read_model(model=model_xml_path)
compiled_model = core.compile_model(model=model, device_name=device.value) # 윗줄에서 지정한 model과 위에서 선택했던, CPU 를 compiled_model에넣는다.

input_layer_ir = compiled_model.input(0)
output_layer_ir = compiled_model.output("boxes")

print(input_layer_ir.shape)

## Load an Image
[back to top ⬆️](#Table-of-contents:)


In [None]:
# Download the image from the openvino_notebooks storage
# data 폴더에 이미지를 다운로드함.
image_filename = download_file(
    "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg",
    directory="data",
)

# Text detection models expect an image in BGR format. intel_mb.jpg 파일을 불러와 image변수에 저장
image = cv2.imread(str(image_filename))
print(image.shape)  # 이미지 크기(H,W,C) 컬러이미지의 채널은 3이다.

# 이미지 전처리
- 정해진 오픈비노 input  값으로 사용할 이미지의 크기를 조정해준다.

In [None]:
# input 이미지로 사용할 이미지의 크기와 차원을 확장한다.

# N,C,H,W = batch size, number of channels, height, width.
N, C, H, W = input_layer.shape  #[1,3,704,704]

# Resize the image to meet network expected input sizes.
resized_image = cv2.resize(image, (W, H)) #704,704 크기로 이미지 사이즈 변경
print(resized_image.shape)

# Reshape to the network input shape. 차원확장
input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)  # 0번지에 차원을 확장함.
print(input_image.shape)

plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));

## Do Inference
[back to top ⬆️](#Table-of-contents:)


In [None]:
# Create an inference request.
boxes = compiled_model([input_image])[output_layer_ir]
print(boxes.shape)
# [input_image] 가 [1,3,704,704] 인데 [output_layer_ir] 인 [100,5]으로 만들어 boxes 로 저장해준다.

# Remove zero only boxes.
boxes = boxes[~np.all(boxes == 0, axis=1)]

## Visualize Results
[back to top ⬆️](#Table-of-contents:)


In [None]:
# For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:
# The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function
def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):  # threshold 값은 상황에 맞게 정하면 됨.
    # Define colors for boxes and descriptions. (컬러지정)
    colors = {"red": (255, 0, 0), "green": (0, 255, 0)}

    # Fetch the image shapes to calculate a ratio.(비율을 잡는다)(원래이미지x,y 값을 받고, 조정된x,y 값을 받아서....
    (real_y, real_x), (resized_y, resized_x) = (
        bgr_image.shape[:2],                        # 이미지의 높이와 너비값만 가지고 와라 [517,690]
        resized_image.shape[:2],                    # 이미지의 높이와 너비값만 가지고 와라 [704,704]
    )
    ratio_x, ratio_y = real_x / resized_x, real_y / resized_y    #비율조정

    # Convert the base image from BGR to RGB format.
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)

    # Iterate through non-zero boxes.(박스를 그려준다.)
    for box in boxes:
        # Pick a confidence factor from the last place in an array.
        conf = box[-1]
        if conf > threshold:   # 예측값이 처음값 30%보다 크면
            # Convert float to int and multiply corner position of each box by x and y ratio.
            # If the bounding box is found at the top of the image,
            # position the upper box bar little lower to make it visible on the image.
            (x_min, y_min, x_max, y_max) = [
                (int(max(corner_position * ratio_y, 10)) if idx % 2 else int(corner_position * ratio_x)) for idx, corner_position in enumerate(box[:-1]) #원래이미지로 환산
            ]

            # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
            rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3) #3은 박스의 께께

            # Add text to the image based on position and confidence.
            # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
            if conf_labels:
                rgb_image = cv2.putText(         #puttext는 글자를 적어준다
                    rgb_image,
                    f"{conf:.2f}",                #소숫점 2자리까지만
                    (x_min, y_min - 10),          #글자위치
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8,
                    colors["red"],
                    1,
                    cv2.LINE_AA,
                )

    return rgb_image

In [None]:
plt.figure(figsize=(10, 6))   # 그림을 그리는 사이즈를 정해준것 (10인치, 6인치)
plt.axis("off")               # 그림의 좌표 표시-off
# plt.imshow(convert_result_to_image(image, resized_image, boxes, conf_labels=False));  #스레드홀드값은 기본 30%로 그대로 쓰겠다.
plt.imshow(convert_result_to_image(image, resized_image, boxes, conf_labels=True));
# conf_labels=True 이면 예측 글자를 보임.