-
Notifications
You must be signed in to change notification settings - Fork 1
/
ocr.py
145 lines (130 loc) · 4.59 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import json
import os
import requests
from . import *
def ocr_space_file(
filename, overlay=False, api_key=Config.OCR_API, language="eng"
):
"""OCR.space API request with local file.
Python3.5 - not tested on 2.7
:param filename: Your file path & name.
:param overlay: Is OCR.space overlay required in your response.
Defaults to False.
:param api_key: OCR.space API key.
Defaults to 'd3viloworld'.
:param language: Language code to be used in OCR.
List of available language codes can be found on https://ocr.space/OCRAPI
Defaults to 'en'.
:return: Result in JSON format.
"""
payload = {
"isOverlayRequired": overlay,
"apikey": api_key,
"language": language,
}
with open(filename, "rb") as f:
r = requests.post(
"https://api.ocr.space/parse/image",
files={filename: f},
data=payload,
)
return r.json()
def ocr_space_url(url, overlay=False, api_key=Config.OCR_API, language="eng"):
"""OCR.space API request with remote file.
Python3.5 - not tested on 2.7
:param url: Image url.
:param overlay: Is OCR.space overlay required in your response.
Defaults to False.
:param api_key: OCR.space API key.
Defaults to 'd3viloworld'.
:param language: Language code to be used in OCR.
List of available language codes can be found on https://ocr.space/OCRAPI
Defaults to 'en'.
:return: Result in JSON format.
"""
payload = {
"url": url,
"isOverlayRequired": overlay,
"apikey": api_key,
"language": language,
}
r = requests.post(
"https://api.ocr.space/parse/image",
data=payload,
)
return r.json()
def progress(current, total):
logger.info(
"Downloaded {} of {}\nCompleted {}".format(
current, total, (current / total) * 100
)
)
@bot.on(d3vil_cmd(pattern="ocrlang", outgoing=True))
@bot.on(sudo_cmd(pattern="ocrlang", allow_sudo=True))
async def get_ocr_languages(event):
if event.fwd_from:
return
languages = {}
languages["English"] = "eng"
languages["Arabic"] = "ara"
languages["Bulgarian"] = "bul"
languages["Chinese (Simplified)"] = "chs"
languages["Chinese (Traditional)"] = "cht"
languages["Croatian"] = "hrv"
languages["Czech"] = "cze"
languages["Danish"] = "dan"
languages["Dutch"] = "dut"
languages["Finnish"] = "fin"
languages["French"] = "fre"
languages["German"] = "ger"
languages["Greek"] = "gre"
languages["Hungarian"] = "hun"
languages["Korean"] = "kor"
languages["Italian"] = "ita"
languages["Japanese"] = "jpn"
languages["Polish"] = "pol"
languages["Portuguese"] = "por"
languages["Russian"] = "rus"
languages["Slovenian"] = "slv"
languages["Spanish"] = "spa"
languages["Swedish"] = "swe"
languages["Turkish"] = "tur"
a = json.dumps(languages, sort_keys=True, indent=4)
await eor(event, str(a))
@bot.on(d3vil_cmd(pattern=r"ocr (.*)", outgoing=True))
@bot.on(sudo_cmd(pattern=r"ocr (.*)", allow_sudo=True))
async def parse_ocr_space_api(event):
if event.fwd_from:
return
d3vil = await eor(event, "Processing weit...🤓")
if not os.path.isdir(Config.TMP_DOWNLOAD_DIRECTORY):
os.makedirs(Config.TMP_DOWNLOAD_DIRECTORY)
lang_code = event.pattern_match.group(1)
downloaded_file_name = await borg.download_media(
await event.get_reply_message(),
Config.TMP_DOWNLOAD_DIRECTORY,
progress_callback=progress,
)
test_file = ocr_space_file(filename=downloaded_file_name, language=lang_code)
try:
ParsedText = test_file["ParsedResults"][0]["ParsedText"]
ProcessingTimeInMilliseconds = str(
int(test_file["ProcessingTimeInMilliseconds"]) // 1000
)
except Exception as e:
await eod(d3vil, "**Errors !!** \n`{}`\n**Report This to** {}\n\n`{}`".format(
str(e), d3vil_grp, json.dumps(test_file, sort_keys=True, indent=4)
)
)
else:
await d3vil.edit("Read Document in {} seconds. \n{}".format(
ProcessingTimeInMilliseconds, ParsedText
)
)
os.remove(downloaded_file_name)
await d3vil.edit(ParsedText)
CmdHelp("ocr").add_command(
"ocr", "<reply to a img> <lang code>", "Reads and sends you the text written in replied image in selected language"
).add_command(
"ocrlang", None, "Gives the list of supported languages of OCR."
).add()