This repository was archived by the owner on Apr 9, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathunicode_lookup.py
95 lines (84 loc) · 2.76 KB
/
unicode_lookup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import time
import difflib
from util.hook import *
from util import web
from util import database
from util import output
uc_names, cp_names, uc = [], {}, {}
@hook(cmds=['u'], ex='u SPACE')
def u(code, input, search=False):
"""Look up unicode information."""
global uc_names, cp_names, uc
arg = repr(input.group(2)).replace('\u', '').replace('u', '').replace('\'', '')
if not arg:
return code.reply('You gave me zero length input.')
found = difflib.get_close_matches(arg.upper(), uc_names)
if not found:
if arg.upper() in uc_names:
found = arg.upper()
if found:
if search:
return code.say('{b}Possible matches:{b} %s' % ', '.join(found))
char = uc[list(found)[0]]
if not found:
# Try the codepoint search too...
if arg.upper() in cp_names:
char = uc[cp_names[arg.upper()]]
else:
if len(arg) < 7:
return code.reply('No results found for U+%s' % arg.upper())
else:
return code.reply('No results found for "%s"' % arg.upper())
msg = u'{} (U+{}) '.format(char[1], char[0])
msg += '"' + u_converter('\u' + char[0]) + '"'
code.say(msg)
def gen_db(botname):
global uc_names, cp_names, uc
# http://www.unicode.org/reports/tr44/#UnicodeData.txt
output.info('Downloading Unicode data')
data = web.text('http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
data = data.split('\n')
del data[-1]
# http://www.unicode.org/reports/tr44/#UnicodeData.txt
for line in data:
tmp = line.split(';')
name = tmp[1]
if tmp[10]:
name = name + ' ' + str(tmp[10])
uc[name] = tmp
uc_names.append(name)
cp_names[tmp[0]] = name
database.set(botname, {'uc': uc, 'uc_names': uc_names,
'cp_names': cp_names, 'time': int(time.time())}, 'unicodedata')
def setup(code):
global uc_names, cp_names, uc
curr = int(time.time())
db = database.get(code.default, 'unicodedata')
if not db:
gen_db(code.default)
else:
diff = int(curr - int(db['time']))
if diff > 518400:
gen_db(code.default)
return
uc = db['uc']
uc_names = db['uc_names']
cp_names = db['cp_names']
@hook(cmds=['us'], ex='us SPACE')
def us(code, input):
u(code, input, search=True)
def u_converter(string):
chars = string.split('\u')
chinese = ''
for char in chars:
if len(char):
try:
ncode = int(char, 16)
except ValueError:
continue
try:
uchar = unichr(ncode)
except ValueError:
continue
chinese += uchar
return chinese