This repository has been archived by the owner on Sep 18, 2019. It is now read-only.
/
test_decoding.py
76 lines (67 loc) · 2.95 KB
/
test_decoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# coding: utf8
"""
Tests for decoding bytes to Unicode
-----------------------------------
:copyright: (c) 2012 by Simon Sapin.
:license: BSD, see LICENSE for more details.
"""
from __future__ import unicode_literals
import pytest
from tinycss.decoding import decode
def params(css, encoding, use_bom=False, expect_error=False, **kwargs):
"""Nicer syntax to make a tuple."""
return css, encoding, use_bom, expect_error, kwargs
@pytest.mark.parametrize(('css', 'encoding', 'use_bom', 'expect_error',
'kwargs'), [
params('𐂃', 'utf8'),
params('é', 'latin1', expect_error=True),
params('é', 'latin1', protocol_encoding='ISO-8859-1'),
params('é', 'latin1', linking_encoding='ISO-8859-1'),
params('é', 'latin1', document_encoding='ISO-8859-1'),
params('é', 'latin1', protocol_encoding='utf8',
document_encoding='latin1'),
params('@charset "utf8"; é', 'latin1', expect_error=True),
params('@charset "uùùùùtf8"; é', 'latin1', expect_error=True),
params('@charset "utf8"; é', 'latin1', document_encoding='latin1'),
params('é', 'latin1', linking_encoding='utf8',
document_encoding='latin1'),
params('@charset "utf-32"; 𐂃', 'utf-32-be'),
params('@charset "ISO-8859-1"; é', 'latin1'),
params('@charset "ISO-8859-8"; é', 'latin1', expect_error=True),
params('𐂃', 'utf-16-le', expect_error=True), # no BOM
params('𐂃', 'utf-16-le', use_bom=True),
params('𐂃', 'utf-32-be', expect_error=True),
params('𐂃', 'utf-32-be', use_bom=True),
params('𐂃', 'utf-32-be', document_encoding='utf-32-be'),
params('𐂃', 'utf-32-be', linking_encoding='utf-32-be'),
params('@charset "utf-32-le"; 𐂃', 'utf-32-be',
use_bom=True, expect_error=True),
# protocol_encoding takes precedence over @charset
params('@charset "ISO-8859-8"; é', 'latin1',
protocol_encoding='ISO-8859-1'),
params('@charset "ISO-8859-1"; é', 'latin1',
protocol_encoding='utf8'),
# @charset takes precedence over document_encoding
params('@charset "ISO-8859-1"; é', 'latin1',
document_encoding='ISO-8859-8'),
# @charset takes precedence over linking_encoding
params('@charset "ISO-8859-1"; é', 'latin1',
linking_encoding='ISO-8859-8'),
# linking_encoding takes precedence over document_encoding
params('é', 'latin1',
linking_encoding='ISO-8859-1', document_encoding='ISO-8859-8'),
])
def test_decode(css, encoding, use_bom, expect_error, kwargs):
if use_bom:
source = '\ufeff' + css
else:
source = css
css_bytes = source.encode(encoding)
try:
result, result_encoding = decode(css_bytes, **kwargs)
except UnicodeDecodeError as exc:
result = exc
if expect_error:
assert result != css, 'Unexpected unicode success'
else:
assert result == css, 'Unexpected unicode error'