In [None]:
def convert_coord(coord_list): # coord_list는 x,y좌표의 최대 최소
  x_min,x_max, y_min, y_max = coord_list
  return [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]] # bounding box 사각 좌표 리턴


def crop(image, points):
    pts = np.array(points, np.int32)

    # Crop the bounding rect
    rect = cv2.boundingRect(pts)
    x, y, w, h = rect
    croped = image[y : y + h, x : x + w].copy()

    # make mask
    pts = pts - pts.min(axis=0)

    mask = np.zeros(croped.shape[:2], np.uint8)
    cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)

    # do bit-op
    dst = cv2.bitwise_and(croped, croped, mask=mask)

    # add the white background
    bg = np.ones_like(croped, np.uint8) * 255
    cv2.bitwise_not(bg, bg, mask=mask)
    result = bg + dst

    return result

In [None]:
import os, sys
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import cv2
import numpy as np
from imutils.object_detection import non_max_suppression
import argparse
import time
import cv2
import shutil
import os

# 이미지 불러오기
image = cv2.imread("/content/EasyOCR/preprocessedData/KakaoTalk_20240519_220636480.jpg")
orig = image.copy()
(H, W) = image.shape[:2]

# 이미지 resize
(newW, newH) = (5120, 5120)
rW = W / float(newW)
rH = H / float(newH)

image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]

#로딩 east 모델
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"]

print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet("/content/drive/MyDrive/Colab Notebooks/workspace/pre_trained_model/frozen_east_text_detection.pb")

# net 입력 위해 blob 객체로 변경
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
                             (123.68, 116.78, 103.94), swapRB=True, crop=False)

# net 입력
net.setInput(blob)
# 학습 결과
(scores, geometry) = net.forward(layerNames)

#confidence scores
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []

for y in range(0, numRows):
    # extract the scores (probabilities), followed by the geometrical
    # data used to derive potential bounding box coordinates that
    # surround text
    scoresData = scores[0, 0, y]
    xData0 = geometry[0, 0, y]
    xData1 = geometry[0, 1, y]
    xData2 = geometry[0, 2, y]
    xData3 = geometry[0, 3, y]
    anglesData = geometry[0, 4, y]

    for x in range(0, numCols):
        # if our score does not have sufficient probability, ignore it
        if scoresData[x] < 0.4:
            continue

        # compute the offset factor as our resulting feature maps will
        # be 4x smaller than the input image
        (offsetX, offsetY) = (x * 4.0, y * 4.0)

        # extract the rotation angle for the prediction and then
        # compute the sin and cosine
        angle = anglesData[x]
        cos = np.cos(angle)
        sin = np.sin(angle)

        # use the geometry volume to derive the width and height of
        # the bounding box
        h = xData0[x] + xData2[x]
        w = xData1[x] + xData3[x]

        # compute both the starting and ending (x, y)-coordinates for
        # the text prediction bounding box
        endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
        endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
        startX = int(endX - w)
        startY = int(endY - h)

        # add the bounding box coordinates and probability score to
        # our respective lists
        rects.append((startX, startY, endX, endY))
        confidences.append(scoresData[x])

# apply non-maxima suppression to suppress weak, overlapping bounding
# boxes
boxes = non_max_suppression(np.array(rects), probs=confidences)

count = 1
# change box width and height -> positive will add pixels and vice-versa
box_width_padding = 50
box_height_padding = 50

temp_image = orig.copy()

# delete output folder
try:
    shutil.rmtree('output')
except Exception as e:
    do = "nothing"

# create empty output folder
uncreated = 1
while (uncreated):
    try:
        os.mkdir('output')
        uncreated = 0
    except Exception as e:
        do = "nothing"

# define crop object
class Crop(object):
    def __init__(self, startX, startY, endX, endY):
        self.startX = startX
        self.startY = startY
        self.endX = endX
        self.endY = endY

    def __eq__(self, other):
        diff = abs(self.startY - other.startY)
        if (diff <= 10):
            return self.startX == other.startX
        else:
            False

    def __lt__(self, other):
        diff = abs(self.startY - other.startY)
        if (diff <= 10):
            return self.startX < other.startX
        else:
            return self.startY < other.startY

croppedList = []

# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the bounding box coordinates based on the respective
    # ratios
    startX = int(startX * rW) - box_width_padding
    startY = int(startY * rH) - box_height_padding
    endX = int(endX * rW) + box_width_padding
    endY = int(endY * rH) + box_height_padding

    # draw the bounding box on the image
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

    # append to croppedList to sort the images
    croppedList.append(Crop(startX, startY, endX, endY))

croppedList = sorted(croppedList)

for img in croppedList:
    roi = temp_image[img.startY:img.endY, img.startX:img.endX]
    try:
      cv2.imwrite("output/" + str(count) + ".jpg", roi)
      count = count + 1
      print(count)
    except:
      continue

# show the output image
cv2.imwrite("output/Text Detection.jpg", orig)
cv2.waitKey(0)

[INFO] loading EAST text detector...
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187


-1

###recognition

In [None]:
!git clone https://github.com/JaidedAI/EasyOCR.git

Cloning into 'EasyOCR'...
remote: Enumerating objects: 2736, done.[K
remote: Total 2736 (delta 0), reused 0 (delta 0), pack-reused 2736[K
Receiving objects: 100% (2736/2736), 157.83 MiB | 13.52 MiB/s, done.
Resolving deltas: 100% (1664/1664), done.
Updating files: 100% (313/313), done.


In [None]:
!pip install -r ./EasyOCR/requirements.txt

Collecting python-bidi (from -r ./EasyOCR/requirements.txt (line 8))
  Downloading python_bidi-0.4.2-py2.py3-none-any.whl (30 kB)
Collecting pyclipper (from -r ./EasyOCR/requirements.txt (line 11))
  Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (908 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m908.3/908.3 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ninja (from -r ./EasyOCR/requirements.txt (line 12))
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/307.2 kB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->-r ./EasyOCR/requirements.txt (line 1))
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->-r ./EasyOCR/requirements.txt (line 1))
  Us

In [None]:
!mkdir "EasyOCR/user_network"
!mkdir "EasyOCR/model"
!mkdir "EasyOCR/newExamples"
!mkdir "EasyOCR/preprocessedData"

In [None]:
%cd EasyOCR

/content/EasyOCR


In [None]:
from easyocr.easyocr import *
from PIL import Image

# GPU 설정
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'


def get_files(path):
    file_list = []

    files = [f for f in os.listdir(path) if not f.startswith('.')]  # skip hidden file
    files.sort()
    abspath = os.path.abspath(path)
    for file in files:
        file_path = os.path.join(abspath, file)
        file_list.append(file_path)

    return file_list, len(file_list)


if __name__ == '__main__':

    # # Using default model
    # reader = Reader(['ko'], gpu=True)

    # Using custom model
    #'/content/drive/MyDrive/Colab Notebooks/workspace/pre_trained_model/'
    reader = Reader(['ko'], gpu=True,
                    model_storage_directory='/content/drive/MyDrive/Colab Notebooks/workspace/pre_trained_model/',
                    user_network_directory='./user_network',
                    recog_network='custom')

    files, count = get_files('/content/EasyOCR/preprocessedData')

    for idx, file in enumerate(files):
        string_list = []
        filename = os.path.basename(file)
        Image.MAX_IMAGE_PIXELS = None
        result = reader.readtext(file)

        # ./easyocr/utils.py 733 lines
        # result[0]: bbox
        # result[1]: string
        # result[2]: confidence
        for (bbox, string, confidence) in result:
            string_list.append(string)
            print("filename: '%s', confidence: %.4f, string: '%s'" % (filename, confidence, string))
            # print('bbox: ', bbox)

        file_name = '/content/drive/MyDrive/Colab Notebooks/workspace/recognized_files/recognized_{}.txt'.format(filename)

        with open(file_name, 'w+') as file:
            file.write('\n'.join(string_list))


  return F.conv2d(input, weight, bias, self.stride,


filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.8398, string: '복약안내'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.7878, string: '악제비영수증'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.8880, string: '서움온바론정램외과'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.5186, string: '한일헤 '
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.2520, string: '영수중션호'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.8348, string: '발행기관'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.4964, string: '조제악사'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.9594, string: '고부벤호'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.3257, string: '한자성명 '
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.9715, string: '221 어'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.5267, string: '2024 벤04염11일'
filename: 'KakaoTalk_20240413_184140770.jpg', confidence: 0.9995, string: '조제일자'
filename: 

###preprocessing

In [None]:
from imutils.object_detection import non_max_suppression
from imutils.perspective import four_point_transform
from imutils.contours import sort_contours
import matplotlib.pyplot as plt
import imutils
import numpy as np
import requests
import cv2
import os
from scipy.ndimage import label

In [None]:
def find_contours(img):
  blurred = cv2.GaussianBlur(img, (5, 5,), 0)
  ret, thr1 = cv2.threshold(img, 100, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
  #  thr1 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,15,2)

  # edged = cv2.Canny(blurred, 74, 200)
  # edged = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

  # plt.subplot(121),plt.imshow(img)
  # plt.subplot(122),plt.imshow(thr1)
  # plt.title("find_contours")
  # plt.show()
  # return gray
  return thr1

In [None]:
def sharpening_img(img):
  sharpening_mask1 = np.array([[-2, -2, -2], [-2, 18, -2], [-2, -2, -2]])
  sharpening_out1 = cv2.filter2D(img, -1, sharpening_mask1)

  # plt.subplot(121),plt.imshow(img)
  # plt.subplot(122),plt.imshow(sharpening_out1)
  # plt.title("sharpening")
  # plt.show()
  return sharpening_out1

In [None]:
def find_outlines(edged, img):
  cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  cnts = imutils.grab_contours(cnts)
  cnts = sorted(cnts, key=cv2.contourArea, reverse=True)

  roi_list = []
  roi_title_list = []

  margin = 20
  receipt_grouping = img.copy()

  for c in cnts:
    (x, y, w, h) = cv2.boundingRect(c)
    ar = w // float(h)

    if ar > 3.0 and ar < 6.5 and (W/2) < x:
      color = (0, 255, 0)
      roi = img[y - margin:y + h + margin, x - margin:x + w + margin]
      roi_list.append(roi)
      roi_title_list.append("Roi_{}".format(len(roi_list)))
    else:
      color = (0, 0, 255)

    cv2.rectangle(receipt_grouping, (x - margin, y - margin), (x + w + margin, y + h + margin), color, 2)
    cv2.putText(receipt_grouping, "".join(str(ar)), (x, y - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.65, color, 2)

  plt_imshow(["Grouping Image"], [receipt_grouping], figsize=(16, 10))

In [None]:
def filtering_img(img):
  kernel = np.ones((3,3), dtype = np.float64) / 9
  dst = cv2.filter2D(img, -1, kernel)
  return dst

In [None]:
def denoisiong_img(img):
  dst = cv2.fastNlMeansDenoising(img, None, 10, 7, 21)

  # plt.subplot(121),plt.imshow(img)
  # plt.subplot(122),plt.imshow(dst)
  # plt.title("denoisiong")
  # plt.show()
  return dst

In [None]:
def resize_img(img):
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  print(gray.shape)
  resized = cv2.resize(gray, (gray.shape[1]*6,gray.shape[0]*6))
  kernel1 = np.ones((7,7), np.uint8)
  kernel2 = np.ones((3,3), np.uint8)
  kernel3 = np.ones((5,5), np.uint8)

  dilated = cv2.dilate(resized, kernel1, iterations=1)
  dilated = cv2.erode(dilated, kernel2, iterations=1)
  # print(dilated.shape)
  # morph1 = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, kernel3)
  morph2 = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, kernel3)
  # plt.subplot(221),plt.imshow(img)
  # plt.subplot(222),plt.imshow(dilated)
  # plt.subplot(223),plt.imshow(morph1)
  # plt.subplot(224),plt.imshow(morph2)

  # plt.title("resizing")
  # plt.show()
  return morph2

In [None]:
def load_file(folder, file):
  path = os.path.join(folder, file)
  org_image = cv2.imread(path)

  # plt_imshow("Original", org_image)

  return org_image

In [None]:
folder = "/content/EasyOCR/newExamples"
folder = "/content/drive/MyDrive/Colab Notebooks/workspace/pre_examples"

In [None]:
files = os.listdir(folder)

for file in files:
  print(file)
  #read_file로 이미지 생성해서 array로 받아오기 / url or file path
  img = load_file(folder, file)
  org_image = resize_img(img)
  org_image = find_contours(org_image)
  org_image = filtering_img(org_image)
  # # find_outlinaes(org_image, img)
  # org_image = denoisiong_img(org_image)
  org_image = sharpening_img(org_image)
  cv2.imwrite(f"./preprocessedData/{file}", org_image)
  print(org_image.shape)

KakaoTalk_20240413_184140770.jpg
(1578, 2102)
(9468, 12612)
KakaoTalk_20240421_143645255.jpg
(3029, 2939)
(18174, 17634)
KakaoTalk_20240421_143618939_03.jpg
(720, 1280)
(4320, 7680)
KakaoTalk_20240501_200012369.jpg
(3024, 4032)
(18144, 24192)
KakaoTalk_20240430_145159867.jpg
(1490, 2220)
(8940, 13320)
KakaoTalk_20240519_220636480.jpg
(2100, 1576)
(12600, 9456)


easyOCR

In [None]:
!pip install easyocr

Collecting easyocr
  Downloading easyocr-1.7.1-py3-none-any.whl (2.9 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.9 MB[0m [31m8.8 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━[0m [32m1.7/2.9 MB[0m [31m25.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: easyocr
Successfully installed easyocr-1.7.1


In [None]:
import easyocr
reader = easyocr.Reader(['ko']) # this needs to run only once to load the model into memory




Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |██████████████████████████████████████████████████| 100.1% Complete

In [None]:
result = reader.readtext("/content/EasyOCR/preprocessedData/KakaoTalk_20240519_220636480.jpg")
print(result)



[([[691, 1626], [1867, 1626], [1867, 2074], [691, 2074]], '쫓제약', 0.3392849862575531), ([[2018, 1604], [3543, 1604], [3543, 2076], [2018, 2076]], '복약안내', 0.9982784986495972), ([[1852, 2226], [2104, 2226], [2104, 2449], [1852, 2449]], '면', 0.997389353725012), ([[5350, 2170], [6402, 2170], [6402, 2503], [5350, 2503]], '조제와 사', 0.6158391177093598), ([[532, 2473], [2175, 2473], [2175, 2790], [532, 2790]], '처방전교부번호:', 0.7933749289051385), ([[2343, 2510], [4064, 2510], [4064, 2745], [2343, 2745]], "2024513'012", 0.3943121930504073), ([[5351, 2483], [6501, 2483], [6501, 2816], [5351, 2816]], '조제인 자:', 0.9684209309155178), ([[533, 2780], [2164, 2780], [2164, 3095], [533, 3095]], '처방전발 챔기관:', 0.2080252769648935), ([[3906, 3256], [5789, 3256], [5789, 3515], [3906, 3515]], '복   약  안   내', 0.5662222088384), ([[578, 3452], [3004, 3452], [3004, 3723], [578, 3723]], '손표니돈정어밀리그립(머자프럽', 0.014377421931944925), ([[3201, 3516], [4299, 3516], [4299, 3728], [3201, 3728]], '1회투약랑 5.00', 0.6553856937576866), (