## OCR은 문자 인식을 하는 기술입니다. 

### 관련된 패키지를 import 합니다.

In [1]:
import requests

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
from io import BytesIO

### Subscription Key와 호출 URL을 셋팅 합니다. 

In [None]:
subscription_key = "{Subscription Key}"
assert subscription_key

In [None]:
vision_base_url = "{Computer Vision URL}/vision/v2.0/"

In [None]:
ocr_url = vision_base_url + "ocr"

### 분석할 이미지를 셋팅 한다. 

In [None]:
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/af/" + \
    "Atomist_quote_from_Democritus.png/338px-Atomist_quote_from_Democritus.png"

In [None]:
image = Image.open(BytesIO(requests.get(image_url).content))
image

### 헤더 정보를 셋팅한다.

In [None]:
headers = {'Ocp-Apim-Subscription-Key': subscription_key}
params  = {'language': 'unk', 'detectOrientation': 'true'}
data    = {'url': image_url}

### 서비스를 요청하고 결과를 확인한다.

In [None]:
response = requests.post(ocr_url, headers=headers, params=params, json=data)
response.raise_for_status()

In [None]:
analysis = response.json()

In [None]:
analysis

In [None]:
line_infos = [region["lines"] for region in analysis["regions"]]
word_infos = []
for line in line_infos:
    for word_metadata in line:
        for word_info in word_metadata["words"]:
            word_infos.append(word_info)
word_infos

In [None]:
plt.figure(figsize=(5, 5))
image = Image.open(BytesIO(requests.get(image_url).content))
ax = plt.imshow(image, alpha=0.5)
for word in word_infos:
    bbox = [int(num) for num in word["boundingBox"].split(",")]
    text = word["text"]
    origin = (bbox[0], bbox[1])
    patch  = Rectangle(origin, bbox[2], bbox[3], fill=False, linewidth=2, color='y')
    ax.axes.add_patch(patch)
    plt.text(origin[0], origin[1], text, fontsize=20, weight="bold", va="top")
plt.axis("off")

### 한글이 잘 되는지 테스트 한다.

In [None]:
image_url = "https://www.unikorea.go.kr/unikorea/common/images/content/peace.png"

In [None]:
image = Image.open(BytesIO(requests.get(image_url).content))
image

In [None]:
headers = {'Ocp-Apim-Subscription-Key': subscription_key}
params  = {'language': 'ko', 'detectOrientation': 'true'}
data    = {'url': image_url}

지원하는 언어목록

- unk (AutoDetect)
- zh-Hans (ChineseSimplified)
- zh-Hant (ChineseTraditional)
- cs (Czech)
- da (Danish)
- nl (Dutch)
- en (English)
- fi (Finnish)
- fr (French)
- de (German)
- el (Greek)
- hu (Hungarian)
- it (Italian)
- ja (Japanese)
- ko (Korean)
- nb (Norwegian)
- pl (Polish)
- pt (Portuguese,
- ru (Russian)
- es (Spanish)
- sv (Swedish)
- tr (Turkish)
- ar (Arabic)
- ro (Romanian)
- sr-Cyrl (SerbianCyrillic)
- sr-Latn (SerbianLatin)
- sk (Slovak)

In [None]:
response = requests.post(ocr_url, headers=headers, params=params, json=data)
response.raise_for_status()

In [None]:
analysis = response.json()

In [None]:
analysis

In [None]:
line_infos = [region["lines"] for region in analysis["regions"]]
word_infos = []
for line in line_infos:
    for word_metadata in line:
        for word_info in word_metadata["words"]:
            word_infos.append(word_info)
word_infos