-
Notifications
You must be signed in to change notification settings - Fork 21
/
utils.py
203 lines (172 loc) · 5.9 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import logging
__author__ = 'leifj'
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.x509 import load_pem_x509_certificate, load_der_x509_certificate
from defusedxml import lxml
from lxml import etree as etree
from xmlsec.PyCryptoShim import RSAobjShim
from xmlsec.int_to_bytes import int_to_bytes
from xmlsec.exceptions import XMLSigException
from six.moves import html_entities as htmlentitydefs
import six
import re
from io import BytesIO
from base64 import b64encode, standard_b64decode
def parse_xml(data, remove_whitespace=True, remove_comments=True, schema=None):
"""
Parse XML data into an lxml.etree and remove whitespace in the process.
:param data: XML as string
:param remove_whitespace: boolean
:returns: XML as lxml.etree
"""
parser = etree.XMLParser(remove_blank_text=remove_whitespace, remove_comments=remove_comments, schema=schema)
return etree.XML(data, parser)
def pem2b64(pem):
"""
Strip the header and footer of a .pem. BEWARE: Won't work with explanatory
strings above the header.
@params pem A string representing the pem
"""
# XXX try to use cryptography parser to support things like
# https://tools.ietf.org/html/rfc7468#section-5.2
pem = pem.decode('ascii')
return '\n'.join(pem.strip().split('\n')[1:-1])
def b642pem(data):
x = data
r = b"-----BEGIN CERTIFICATE-----\n"
while len(x) > 64:
r += x[0:64]
r += b"\n"
x = x[64:]
r += x
r += b"\n"
r += b"-----END CERTIFICATE-----"
return r
def _cert2dict(cert):
"""
Build cert_dict similar to old rsa_x509_pem backend. Shouldn't
be used by new code.
@param cert A cryptography.x509.Certificate object
"""
key = cert.public_key()
if not isinstance(key, rsa.RSAPublicKey):
raise XMLSigException("We don't support non-RSA public keys at the moment.")
cdict = dict()
cdict['type'] = "X509 CERTIFICATE"
cdict['pem'] = cert.public_bytes(encoding=serialization.Encoding.PEM)
cdict['body'] = b64encode(cert.public_bytes(encoding=serialization.Encoding.DER))
n = key.public_numbers()
cdict['modulus'] = n.n
cdict['publicExponent'] = n.e
cdict['subject'] = cert.subject
cdict['cert'] = RSAobjShim(cert)
return cdict
def pem2cert(pem):
"""
Return cert_dict similar to old rsa_x509_pem backend. Shouldn't
be used by new code.
@param pem The certificate as pem string
"""
cert = load_pem_x509_certificate(pem, backend=default_backend())
return _cert2dict(cert)
def b642cert(data):
"""
Return cert_dict similar to old rsa_x509_pem backend. Shouldn't
be used by new code.
@param data The certificate as base64 string (i.e. pem without header/footer)
"""
cert = load_der_x509_certificate(standard_b64decode(data), backend=default_backend())
return _cert2dict(cert)
def unescape_xml_entities(text):
"""
Removes HTML or XML character references and entities from a text string.
@param text The HTML (or XML) source text.
@return The plain text, as a Unicode string, if necessary.
"""
def fixup(m):
txt = m.group(0)
if txt[:2] == "&#":
# character reference
try:
if txt[:3] == "&#x":
return txt
#return unichr(int(txt[3:-1], 16))
else:
return unichr(int(txt[2:-1]))
except ValueError:
pass
else:
# named entity
try:
if not txt in ('&', '<', '>', '"', '&pos;'):
txt = unichr(htmlentitydefs.name2codepoint[txt[1:-1]])
except KeyError:
pass
return txt # leave as is
return re.compile("&#?\w+;").sub(fixup, text)
#return re.sub("&#?\w+;", fixup, text)
def delete_elt(elt):
if elt.getparent() is None:
raise XMLSigException("Cannot delete root")
if elt.tail is not None:
#logging.debug("tail: '%s'" % elt.tail)
p = elt.getprevious()
if p is not None:
#logging.debug("adding tail to previous")
if p.tail is None:
p.tail = ''
p.tail += elt.tail
else:
#logging.debug("adding tail to parent")
up = elt.getparent()
if up is None:
raise XMLSigException("Signature has no parent")
if up.text is None:
up.text = ''
up.text += elt.tail
elt.getparent().remove(elt)
def root_elt(t):
if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'):
return t.getroot()
else:
return t
def number_of_bits(num):
"""
Return the number of bits required to represent num.
In python >= 2.7, there is num.bit_length().
NOTE: This function appears unused, so it might go away.
"""
assert num >= 0
# this is much faster than you would think, AND it is easy to read ;)
return len(bin(num)) - 2
def b64d(s):
return standard_b64decode(s)
def b64e(s):
if isinstance(s, six.integer_types):
s = int_to_bytes(s)
return b64encode(s)
def serialize(t, stream=None):
xml = six.text_type(etree.tostring(t, encoding='utf-8', xml_declaration=True), 'utf-8')
if stream is not None:
with open(stream, 'w') as xml_out:
xml_out.write(xml)
else:
print(xml)
def unicode_to_bytes(u):
if six.PY2:
return u.encode('utf-8')
else:
return bytes(u, encoding='utf-8')
def etree_to_string(obj):
"""
:param obj: etree element
:type obj: lxml.etree.Element
:return: serialized element
:rtype: six.string_types
"""
if six.PY2:
return etree.tostring(obj, encoding='UTF-8')
else:
return etree.tostring(obj, encoding='unicode')