/
auxiliary.py
398 lines (302 loc) 路 11.5 KB
/
auxiliary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
import re
import tempfile
import sys
import os
from os import path
import gdown
import cv2
import requests
from symspellpy import SymSpell, Verbosity
import numpy as np
import pytesseract as ocr
from PIL import Image
from sklearn.cluster import KMeans
from imutils.object_detection import non_max_suppression
def load_east_model():
_path = list(
filter(lambda _path: 'site-packages' in _path, sys.path))[-1]
if _path:
if not path.isdir(_path+'/nkocr-model'):
os.mkdir(_path+'/nkocr-model')
model = _path + '/nkocr-model/frozen_east_text_detection.pb'
if not path.isfile(model):
get_model_from_s3(model)
else:
raise OSError(
'the default directory of Python, site-packages, is not found.')
return model
def get_model_from_s3(output):
url = 'https://project-elements-nk.s3.amazonaws.com/' +\
'frozen_east_text_detection.pb'
try:
gdown.download(url, output, quiet=False)
return output
except Exception:
raise ConnectionError(
'you need to be connected to some internet network to download the EAST model.')
def get_input_type(_input):
if is_url(_input):
input_type = 1
elif is_path(_input):
input_type = 2
elif is_image(_input):
input_type = 3
else:
raise TypeError(
'invalid input, try to send an url, path, numpy.ndarray or PIL.Image.')
return input_type
def is_url(_input):
if isinstance(_input, str):
regex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
# domain...
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
result = re.match(regex, _input) is not None
else:
result = False
return result
def is_path(_input):
if isinstance(_input, str):
file_path = path.realpath(_input)
result = path.isfile(file_path)
else:
result = False
return result
def is_image(_input):
numpy_type = str(type(_input)) == \
'<class '"'"'numpy.ndarray'"'"'>'
plt_bmp_type = str(
type(_input)) == \
'<class '"'"'PIL.BmpImagePlugin.BmpImageFile'"'"'>'
plt_gif_type = str(
type(_input)) == \
'<class '"'"'PIL.GifImagePlugin.GifImageFile'"'"'>'
plt_jpg_type = str(
type(_input)) == \
'<class '"'"'PIL.JpegImagePlugin.JpegImageFile'"'"'>'
plt_png_type = str(
type(_input)) == \
'<class '"'"'PIL.PngImagePlugin.PngImageFile'"'"'>'
plt_ppm_type = str(
type(_input)) == \
'<class '"'"'PIL.PpmImagePlugin.PpmImageFile'"'"'>'
plt_tiff_type = str(
type(_input)) == \
'<class '"'"'PIL.TiffImagePlugin.TiffImageFile'"'"'>'
return numpy_type or \
plt_bmp_type or \
plt_gif_type or \
plt_jpg_type or \
plt_png_type or \
plt_ppm_type or \
plt_tiff_type
def to_opencv_type(image):
return np.asarray(image)[:, :, ::-1]
def remove_alpha_channel(image):
return image[:, :, :3]
def brightness_contrast_optimization(image, alpha=1.5, beta=0):
return cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
def run_kmeans(image, number_clusters):
image = image.reshape((image.shape[0] * image.shape[1], 3))
clusters = KMeans(n_clusters=number_clusters)
clusters.fit(image)
histogram = centroid_histogram(clusters)
colors = sort_colors(histogram, clusters.cluster_centers_)
return colors
def centroid_histogram(clusters):
num_labels = np.arange(0, len(np.unique(clusters.labels_)) + 1)
(histogram, _) = np.histogram(clusters.labels_, bins=num_labels)
histogram = histogram.astype('float')
histogram /= histogram.sum()
return histogram
def sort_colors(histogram, centroids):
sorted_colors = {}
for (percentage, color) in zip(histogram, centroids):
sorted_colors[tuple(color.astype('uint8').tolist())] = percentage
return sorted(sorted_colors.items(), key=lambda x: x[1], reverse=True)
def image_resize(image,
width=None,
height=None,
inter=cv2.INTER_AREA):
dimensions = None
(_height, _width) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
proportion = height / float(_height)
dimensions = (int(_width * proportion), height)
elif height is None:
proportion = width / float(_width)
dimensions = (width, int(_height * proportion))
else:
dimensions = (height, width)
resized = cv2.resize(image, dimensions, interpolation=inter)
resized = set_image_dpi(resized, 300)
return resized
def set_image_dpi(image, dpi):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = Image.fromarray(image)
length_x, width_y = image.size
factor = min(1, float(1024.0 / length_x))
size = int(factor * length_x), int(factor * width_y)
im_resized = image.resize(size, Image.ANTIALIAS)
temp_file = tempfile.NamedTemporaryFile(suffix='.png')
temp_file = temp_file.name
im_resized.save(temp_file, dpi=(dpi, dpi))
return np.asarray(im_resized)[:, :, ::-1]
def open_close_filter(image, method, kernel=2):
kernel = cv2.getStructuringElement(
cv2.MORPH_RECT, (kernel, kernel))
image = 255 - cv2.morphologyEx(255 - image,
method, kernel, iterations=1)
return image
def unsharp_mask(image,
kernel_size=(5, 5),
sigma=1.0,
amount=1.0,
threshold=0):
"""Return a sharpened version of the image, using an unsharp mask."""
# https://homepages.inf.ed.ac.uk/rbf/HIPR2/unsharp.htm
blurred = cv2.GaussianBlur(image, kernel_size, sigma)
sharpened = float(amount + 1) * image - float(amount) * blurred
sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
sharpened = sharpened.round().astype(np.uint8)
if threshold > 0:
low_contrast_mask = np.absolute(image - blurred) < threshold
np.copyto(sharpened, image, where=low_contrast_mask)
return sharpened
def dilate_image(image, kernel_size):
kernel = np.ones((kernel_size, kernel_size), np.uint8)
return cv2.dilate(image, kernel, iterations=1)
def binarize_image(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
bin_image = cv2.threshold(
image, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
return bin_image
def east_process(image):
_image = image.copy()
(_height, _width) = get_size(image)
(ratio_height, ratio_width) = get_ratio(_height, _width)
image = image_resize(image, height=640, width=640)
(height, width) = get_size(image)
east_network = cv2.dnn.readNet(load_east_model())
(scores, geometry) = run_east(east_network, image, height, width)
(rects, confidences) = decode_predictions(scores, geometry, 0.7)
boxes = non_max_suppression(np.array(rects), probs=confidences)
(results, image) = apply_boxes(boxes, _image,
ratio_height, ratio_width,
_height, _width, 0.06)
return sort_boxes(results)
def get_size(image):
return image.shape[0], image.shape[1]
def get_ratio(height, width):
return height / float(640), width / float(640)
def run_east(net, image, height, width):
layer_names = [
'feature_fusion/Conv_7/Sigmoid',
'feature_fusion/concat_3'
]
blob = cv2.dnn.blobFromImage(
image, 1.0, (height, width),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layer_names)
return scores, geometry
def decode_predictions(scores, geometry, min_confidence):
(num_rows, num_cols) = scores.shape[2:4]
rects = []
confidences = []
for constant_y in range(0, num_rows):
scores_data = scores[0, 0, constant_y]
point_0 = geometry[0, 0, constant_y]
point_1 = geometry[0, 1, constant_y]
point_2 = geometry[0, 2, constant_y]
point_3 = geometry[0, 3, constant_y]
angles = geometry[0, 4, constant_y]
for constant_x in range(0, num_cols):
if scores_data[constant_x] < min_confidence:
continue
(offset_x, offset_y) = (constant_x * 4.0, constant_y * 4.0)
angle = angles[constant_x]
cos = np.cos(angle)
sin = np.sin(angle)
height = point_0[constant_x] + point_2[constant_x]
width = point_1[constant_x] + point_3[constant_x]
end_x = int(
offset_x +
(cos * point_1[constant_x]) +
(sin * point_2[constant_x])
)
end_y = int(
offset_y -
(sin * point_1[constant_x]) +
(cos * point_2[constant_x])
)
start_x = int(end_x - width)
start_y = int(end_y - height)
rects.append((start_x, start_y, end_x, end_y))
confidences.append(scores_data[constant_x])
return (rects, confidences)
def apply_boxes(
boxes,
image,
ratio_height,
ratio_width,
height,
width,
padding):
results = []
for (start_x, start_y, end_x, end_y) in boxes:
start_x = int(start_x * ratio_width)
start_y = int(start_y * ratio_height)
end_x = int(end_x * ratio_width)
end_y = int(end_y * ratio_height)
distance_x = int((end_x - start_x) * padding)
distance_y = int((end_y - start_y) * padding)
start_x = max(0, start_x - distance_x)
start_y = max(0, start_y - distance_y)
end_x = min(width, end_x + (distance_x * 2))
end_y = min(height, end_y + (distance_y * 2))
roi = image[start_y:end_y, start_x:end_x]
config = ('-l por --oem 1 --psm 7')
text = ocr.image_to_string(roi, config=config)
results.append(((start_x, start_y, end_x, end_y), text))
cv2.rectangle(image, (start_x, start_y),
(end_x, end_y), (0, 255, 0), 2)
return results, image
def sort_boxes(boxes):
sorted_text = []
lines_values = sorted(list(set(map(lambda box: box[0][1], boxes))))
for value in lines_values:
words_of_line = sorted(
filter(lambda box, word=value: box[0][1] == word, boxes),
key=lambda box: box[0][0]
)
sorted_text.append(words_of_line)
flatten_sorted_text = [
item for sublist in sorted_text for item in sublist]
return flatten_sorted_text
def get_image_from_url(url):
try:
response = requests.get(url)
except Exception:
raise ConnectionError(
'you need to be connected to some internet network to download the EAST model.')
return response
def load_dict_to_memory():
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
sym_spell.load_pickle('./src/dictionary/dictionary.pkl')
return sym_spell
def get_word_suggestion(symspell, input_term):
get_digits = re.findall(r'\d+', input_term)
if len(get_digits) == 0:
suggestion = symspell.lookup(
input_term, Verbosity.TOP, max_edit_distance=2)
if len(suggestion) > 0:
return suggestion[0].term
return input_term