# Paddle OCR을 실행해보자

In [59]:
# CUDA-11.8이 설치되어 있는 GPU 사용 가능한 컴퓨터기 때문에 gpu 버전 다운로드
!pip install paddlepaddle-gpu



In [60]:
# PaddleOCR whl package 설치
!pip install "paddleocr>=2.0.1"



# 한번 사용해보도록 하자

실행하기 전, 컴퓨터에 cuDNN이 설치되어있는지 확인하자. 없다면 설치

CUDA 11.8 사용 기준으로

wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cudnn
sudo apt-get -y install cudnn-cuda-11

In [61]:
import paddle
print(f"Is compiled with CUDA? {paddle.is_compiled_with_cuda()}")
print(f"CUDA devices:, {paddle.device.cuda.device_count()}")

Is compiled with CUDA? True
CUDA devices:, 1


In [62]:
# 콘솔에 아래 명령 입력
# paddleocr --image_dir .책표지_기타_000001.jpg --lang=korean

In [63]:
from paddleocr import PaddleOCR,draw_ocr

ocr = PaddleOCR(lang="korean")

[2025/03/19 11:25:13] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=True, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/uijong/.paddleocr/whl/det/ml/Multilingual_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/uijong/.paddleocr/whl/rec/korean/korean_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 32

In [64]:
# 코드로 실행하자면?
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
# ocr = PaddleOCR(use_angle_cls=True, lang='korean') # need to run only once to download and load model into memory
img_path = '22222.png'
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)


# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='MaruBuri-Regular.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
im_show.show()

[2025/03/19 11:25:16] ppocr DEBUG: dt_boxes num : 45, elapsed : 0.030826568603515625
[2025/03/19 11:25:16] ppocr DEBUG: rec_res num  : 45, elapsed : 0.1533679962158203
[[[0.0, 0.0], [55.0, 0.0], [55.0, 26.0], [0.0, 26.0]], ('라고', 0.9986573457717896)]
[[[68.0, 0.0], [162.0, 3.0], [161.0, 37.0], [67.0, 33.0]], ('합니다!', 0.8103922009468079)]
[[[179.0, 4.0], [239.0, 11.0], [235.0, 48.0], [175.0, 41.0]], ('저희', 0.9842053651809692)]
[[[252.0, 16.0], [357.0, 24.0], [354.0, 58.0], [249.0, 49.0]], ('학교에서', 0.9452927112579346)]
[[[389.0, 26.0], [496.0, 34.0], [493.0, 71.0], [386.0, 62.0]], ('진행하는', 0.8767167925834656)]
[[[523.0, 37.0], [649.0, 45.0], [646.0, 85.0], [520.0, 77.0]], ('프로그램', 0.999920129776001)]
[[[5.0, 48.0], [56.0, 55.0], [52.0, 87.0], [1.0, 79.0]], ('프로', 0.9959990978240967)]
[[[44.0, 54.0], [135.0, 62.0], [132.0, 95.0], [41.0, 88.0]], ('그램이', 0.9999443888664246)]
[[[154.0, 61.0], [250.0, 68.0], [247.0, 105.0], [151.0, 99.0]], ('있는데', 0.9998623728752136)]
[[[277.0, 75.0], [338.0,

In [65]:
# for idx in range(len(boxes)):
#     print(f"boxes: {boxes[idx]}, txts: {txts[idx]}")

# 한글 또는 영어, 숫자 txt만 포함된 box로 재구성
boxes = [boxes[idx] for idx in range(len(boxes)) if txts[idx].isalnum()]

for idx in range(len(boxes)):
    print(f"boxes: {boxes[idx]}")

boxes: [[0.0, 0.0], [55.0, 0.0], [55.0, 26.0], [0.0, 26.0]]
boxes: [[179.0, 4.0], [239.0, 11.0], [235.0, 48.0], [175.0, 41.0]]
boxes: [[252.0, 16.0], [357.0, 24.0], [354.0, 58.0], [249.0, 49.0]]
boxes: [[389.0, 26.0], [496.0, 34.0], [493.0, 71.0], [386.0, 62.0]]
boxes: [[523.0, 37.0], [649.0, 45.0], [646.0, 85.0], [520.0, 77.0]]
boxes: [[5.0, 48.0], [56.0, 55.0], [52.0, 87.0], [1.0, 79.0]]
boxes: [[44.0, 54.0], [135.0, 62.0], [132.0, 95.0], [41.0, 88.0]]
boxes: [[154.0, 61.0], [250.0, 68.0], [247.0, 105.0], [151.0, 99.0]]
boxes: [[277.0, 75.0], [338.0, 80.0], [335.0, 120.0], [274.0, 115.0]]
boxes: [[363.0, 87.0], [405.0, 87.0], [405.0, 123.0], [363.0, 123.0]]
boxes: [[426.0, 88.0], [519.0, 97.0], [515.0, 141.0], [422.0, 133.0]]
boxes: [[576.0, 104.0], [671.0, 109.0], [668.0, 151.0], [573.0, 146.0]]
boxes: [[4.0, 117.0], [29.0, 117.0], [29.0, 146.0], [4.0, 146.0]]
boxes: [[18.0, 119.0], [100.0, 121.0], [99.0, 158.0], [17.0, 155.0]]
boxes: [[108.0, 121.0], [173.0, 126.0], [170.0, 165.0],

In [66]:
!pip install --upgrade pip



In [67]:
!pip install opencv-python



In [68]:
import cv2
import numpy as np

# 1. cv2 imread로 다시 그레이스케일 이미지 획득
image = cv2.imread('22222.png', cv2.IMREAD_GRAYSCALE)

# 2. 연산 효율성을 올리기 위해 boxes를 numpy array로 변환
boxes_np_arr = np.array([])
if boxes and len(boxes) > 0:
    boxes_np_arr = np.array([np.array(box) for box in boxes])

    for box in boxes:
        print(box)
else:
    print("No boxes")

[[0.0, 0.0], [55.0, 0.0], [55.0, 26.0], [0.0, 26.0]]
[[179.0, 4.0], [239.0, 11.0], [235.0, 48.0], [175.0, 41.0]]
[[252.0, 16.0], [357.0, 24.0], [354.0, 58.0], [249.0, 49.0]]
[[389.0, 26.0], [496.0, 34.0], [493.0, 71.0], [386.0, 62.0]]
[[523.0, 37.0], [649.0, 45.0], [646.0, 85.0], [520.0, 77.0]]
[[5.0, 48.0], [56.0, 55.0], [52.0, 87.0], [1.0, 79.0]]
[[44.0, 54.0], [135.0, 62.0], [132.0, 95.0], [41.0, 88.0]]
[[154.0, 61.0], [250.0, 68.0], [247.0, 105.0], [151.0, 99.0]]
[[277.0, 75.0], [338.0, 80.0], [335.0, 120.0], [274.0, 115.0]]
[[363.0, 87.0], [405.0, 87.0], [405.0, 123.0], [363.0, 123.0]]
[[426.0, 88.0], [519.0, 97.0], [515.0, 141.0], [422.0, 133.0]]
[[576.0, 104.0], [671.0, 109.0], [668.0, 151.0], [573.0, 146.0]]
[[4.0, 117.0], [29.0, 117.0], [29.0, 146.0], [4.0, 146.0]]
[[18.0, 119.0], [100.0, 121.0], [99.0, 158.0], [17.0, 155.0]]
[[108.0, 121.0], [173.0, 126.0], [170.0, 165.0], [105.0, 161.0]]
[[203.0, 133.0], [463.0, 162.0], [459.0, 196.0], [199.0, 168.0]]
[[517.0, 162.0], [652.0

In [69]:
# 3. 각 box = bounding box
# bounding box 영역 안에 있는 각 영역을 Crop해서 새로운 이미지로 추출한다
croped_images = []
for idx, box in enumerate(boxes_np_arr):
    # box 점 정렬
    rect = np.zeros((4, 2), dtype="float32")
    s = box.sum(axis=1) # 네 좌표의 합
    diff = np.diff(box, axis=1) # 네 좌표의 차이
    rect[0] = box[np.argmin(s)] # 좌상단 좌표
    rect[2] = box[np.argmax(s)] # 우하단 좌표
    rect[1] = box[np.argmin(diff)] # 우상단 좌표
    rect[3] = box[np.argmax(diff)] # 좌하단 좌표

    # rect = [ 좌상, 우상, 우하, 좌하 ]

    # 네 꼭짓점 좌표를 numpy array로 변환
    # 이때, 검출 영역이 너무 Fit하게 되어있어서 +10을 해준다
    offset = 10
    image_height, image_width = image.shape[:2]
    image_height -= 1
    image_width -= 1

    # rect[0] = 좌상단
    x1 = max(0, rect[0][0] - offset)
    y1 = max(0, rect[0][1] - offset)
    # rect[1] = 우상단
    x2 = min(rect[1][0] + offset, image_width)
    y2 = max(0, rect[1][1] - offset)
    # rect[2] = 우하단
    x3 = min(rect[2][0] + offset, image_width)
    y3 = min(rect[2][1] + offset, image_height)
    # rect[3] = 좌하단
    x4 = max(0, rect[3][0] - offset)
    y4 = min(rect[3][1] + offset, image_height)
    pts = np.array(
        [
            [x1, y1],
            [x2, y2],
            [x3, y3],
            [x4, y4]
        ],
        dtype="float32"
    )

    # 네 좌표를 바탕으로 이미지의 너비 및 높이 계산
    widthA = np.sqrt((pts[2][0] - pts[3][0])**2 + (pts[2][1] - pts[3][1])**2)
    widthB = np.sqrt((pts[1][0] - pts[0][0])**2 + (pts[1][1] - pts[0][1])**2)
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt((pts[1][0] - pts[2][0])**2 + (pts[1][1] - pts[2][1])**2)
    heightB = np.sqrt((pts[0][0] - pts[3][0])**2 + (pts[0][1] - pts[3][1])**2)
    maxHeight = max(int(heightA), int(heightB))

    # print(maxWidth, maxHeight)

    # 출력될 이미지 좌표 설정
    dst = np.array(
        [
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]
        ],
        dtype="float32"
    )

    # Perspective Transformation
    M = cv2.getPerspectiveTransform(pts, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    # 음영 제거 -> 모폴로지 연산
    kernel_size = 150
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))

    background = cv2.morphologyEx(warped, cv2.MORPH_OPEN, kernel)
    gray_minus_bg = cv2.subtract(warped, background)
    normed = cv2.normalize(gray_minus_bg, None, 0, 255, cv2.NORM_MINMAX)

    croped_images.append(normed)
    cv2.imwrite(f"./cropped/croped_{idx}.png", normed)

In [70]:
print(len(croped_images))

41


In [71]:
# 각 이미지 적응형 이진화 수행
# 근데 이미지에 들어있는 글자가 검정색일수도 있고 흰색일수도 있어서
# Crop된 이미지의 평균 픽셀값을 계산해서 평균값이 128보다 작으면 cv2.THRESH_BINARY_INV로 수행

for idx, croped_image in enumerate(croped_images):

    # Median 블러링
    blurred = cv2.medianBlur(croped_image, 3)

    # 적응형 이진화
    # binarization = cv2.adaptiveThreshold(
    #     blurred,
    #     maxValue=255,
    #     adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
    #     thresholdType=cv2.THRESH_BINARY,  # 또는 THRESH_BINARY_INV
    #     blockSize=21,   # 주변 픽셀 블록 크기
    #     C=13     # 결과에서 뺄 상수
    # )
    _, binarization = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # 만약 배경이 반전되어 있다면 invert
    if np.mean(binarization) < 128:
        binarization = cv2.bitwise_not(binarization)

    # kernel = np.ones((5, 5), np.uint8)
    # opened = cv2.morphologyEx(binarization, cv2.MORPH_CLOSE, kernel)

    cv2.imwrite(f"./character/character_{idx}.png", binarization)


In [72]:
import os

# Load all images from the character directory
binarizationed_images = []
character_dir = "./character/"
for filename in sorted(os.listdir(character_dir)):
    if filename.endswith((".png", ".jpg", ".jpeg")):  # Correct way to check multiple extensions
        img_path = os.path.join(character_dir, filename)
        # Load as grayscale directly
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        _, thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        thresh_inv = cv2.bitwise_not(thresh)

        if thresh_inv is not None:
            binarizationed_images.append(thresh_inv)

print(f"Loaded {len(binarizationed_images)} images")

for idx, image in enumerate(binarizationed_images):
    contours, hierarchy = cv2.findContours(
        image,
        cv2.RETR_EXTERNAL,
        cv2.CHAIN_APPROX_SIMPLE
    )

    # 외곽선 기반으로 각 글자 이미지 추출
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)

        print(f"Contour: {x}, {y}, {w}, {h}")

        # 글자 이미지 추출 및 검정색으로 다시 변환
        character = image[y:y+h, x:x+w]
        character = cv2.bitwise_not(character)
        cv2.imwrite(f"./final/character_{idx}_{x}_{y}_{w}_{h}.png", character)


Loaded 41 images
Contour: 27, 0, 26, 21
Contour: 0, 0, 30, 17
Contour: 16, 15, 17, 20
Contour: 42, 13, 27, 30
Contour: 32, 13, 12, 26
Contour: 75, 34, 24, 18
Contour: 14, 14, 55, 33
Contour: 70, 12, 29, 28
Contour: 15, 31, 24, 17
Contour: 104, 29, 4, 17
Contour: 90, 25, 12, 19
Contour: 41, 22, 25, 20
Contour: 74, 21, 14, 24
Contour: 67, 17, 7, 31
Contour: 16, 15, 25, 16
Contour: 0, 11, 6, 9
Contour: 17, 48, 2, 1
Contour: 1, 21, 24, 16
Contour: 0, 18, 3, 3
Contour: 7, 10, 18, 9
Contour: 0, 10, 1, 1
Contour: 0, 32, 8, 3
Contour: 0, 19, 18, 4
Contour: 50, 17, 17, 19
Contour: 68, 13, 13, 24
Contour: 75, 11, 12, 32
Contour: 0, 9, 17, 8
Contour: 79, 59, 2, 1
Contour: 42, 55, 1, 2
Contour: 42, 23, 19, 15
Contour: 64, 16, 8, 28
Contour: 17, 13, 28, 34
Contour: 147, 54, 1, 1
Contour: 140, 54, 1, 1
Contour: 127, 54, 1, 1
Contour: 120, 54, 1, 1
Contour: 106, 54, 3, 1
Contour: 100, 54, 1, 1
Contour: 190, 29, 19, 14
Contour: 14, 27, 22, 15
Contour: 63, 20, 19, 18
Contour: 148, 18, 16, 19
Contour: 9