-
-
Notifications
You must be signed in to change notification settings - Fork 663
/
targets.py
248 lines (192 loc) · 9.58 KB
/
targets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
"""
weasyprint.formatting_structure.targets
---------------------------------------
Handle target-counter, target-counters and target-text.
The TargetCollector is a structure providing required targets'
counter_values and stuff needed to build pending targets later,
when the layout of all targetted anchors has been done.
:copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import copy
from ..logger import LOGGER
class TargetLookupItem(object):
"""Item controlling pending targets and page based target counters.
Collected in the TargetCollector's ``items``.
"""
def __init__(self, state='pending'):
self.state = state
# Required by target-counter and target-counters to access the
# target's .cached_counter_values.
# Needed for target-text via TEXT_CONTENT_EXTRACTORS.
self.target_box = None
# Functions that have to been called to check pending targets.
# Keys are (source_box, css_token).
self.parse_again_functions = {}
# Anchor position during pagination (page_number - 1)
self.page_maker_index = None
# target_box's page_counters during pagination
self.cached_page_counter_values = {}
class CounterLookupItem(object):
"""Item controlling page based counters.
Collected in the TargetCollector's ``counter_lookup_items``.
"""
def __init__(self, parse_again, missing_counters, missing_target_counters):
# Function that have to been called to check pending counter.
self.parse_again = parse_again
# Missing counters and target counters
self.missing_counters = missing_counters
self.missing_target_counters = missing_target_counters
# Box position during pagination (page_number - 1)
self.page_maker_index = None
# Marker for remake_page
self.pending = False
# Targeting box's page_counters during pagination
self.cached_page_counter_values = {}
class TargetCollector(object):
"""Collector of HTML targets used by CSS content with ``target-*``."""
def __init__(self):
# Lookup items for targets and page counters
self.target_lookup_items = {}
self.counter_lookup_items = {}
# When collecting is True, compute_content_list() collects missing
# page counters in CounterLookupItems. Otherwise, it mixes in the
# TargetLookupItem's cached_page_counter_values.
# Is switched to False in check_pending_targets().
self.collecting = True
# had_pending_targets is set to True when a target is needed but has
# not been seen yet. check_pending_targets then uses this information
# to call the needed parse_again functions.
self.had_pending_targets = False
# List of anchors that have already been seen during parsing.
self.existing_anchors = []
def anchor_name_from_token(self, anchor_token):
"""Get anchor name from string or uri token."""
if anchor_token[0] == 'string' and anchor_token[1].startswith('#'):
return anchor_token[1][1:]
elif anchor_token[0] == 'url' and anchor_token[1][0] == 'internal':
return anchor_token[1][1]
def collect_anchor(self, anchor_name):
"""Store ``anchor_name`` in ``existing_anchors``."""
if anchor_name and isinstance(anchor_name, str):
if anchor_name in self.existing_anchors:
LOGGER.warning('Anchor defined twice: %s', anchor_name)
else:
self.existing_anchors.append(anchor_name)
def collect_computed_target(self, anchor_token):
"""Store a computed internal target's ``anchor_name``.
``anchor_name`` must not start with '#' and be already unquoted.
"""
anchor_name = self.anchor_name_from_token(anchor_token)
if anchor_name:
self.target_lookup_items.setdefault(
anchor_name, TargetLookupItem())
def lookup_target(self, anchor_token, source_box, css_token, parse_again):
"""Get a TargetLookupItem corresponding to ``anchor_token``.
If it is already filled by a previous anchor-element, the status is
'up-to-date'. Otherwise, it is 'pending', we must parse the whole
tree again.
"""
anchor_name = self.anchor_name_from_token(anchor_token)
item = self.target_lookup_items.get(
anchor_name, TargetLookupItem('undefined'))
if item.state == 'pending':
if anchor_name in self.existing_anchors:
self.had_pending_targets = True
item.parse_again_functions.setdefault(
(source_box, css_token), parse_again)
else:
item.state = 'undefined'
if item.state == 'undefined':
LOGGER.error(
'Content discarded: target points to undefined anchor "%s"',
anchor_token)
return item
def store_target(self, anchor_name, target_counter_values, target_box):
"""Store a target called ``anchor_name``.
If there is a pending TargetLookupItem, it is updated. Only previously
collected anchors are stored.
"""
item = self.target_lookup_items.get(anchor_name)
if item and item.state == 'pending':
item.state = 'up-to-date'
item.target_box = target_box
# Store the counter_values in the target_box like
# compute_content_list does.
# TODO: remove attribute or set a default value in Box class
if not hasattr(target_box, 'cached_counter_values'):
target_box.cached_counter_values = copy.deepcopy(
target_counter_values)
def collect_missing_counters(self, parent_box, css_token,
parse_again_function, missing_counters,
missing_target_counters):
"""Collect missing (probably page-based) counters during formatting.
The ``missing_counters`` are re-used during pagination.
The ``missing_link`` attribute added to the parent_box is required to
connect the paginated boxes to their originating ``parent_box``.
"""
# No counter collection during pagination
if not self.collecting:
return
# No need to add empty miss-lists
if missing_counters or missing_target_counters:
# TODO: remove attribute or set a default value in Box class
if not hasattr(parent_box, 'missing_link'):
parent_box.missing_link = parent_box
counter_lookup_item = CounterLookupItem(
parse_again_function, missing_counters,
missing_target_counters)
self.counter_lookup_items.setdefault(
(parent_box, css_token), counter_lookup_item)
def check_pending_targets(self):
"""Check pending targets if needed."""
if self.had_pending_targets:
for item in self.target_lookup_items.values():
for function in item.parse_again_functions.values():
function()
self.had_pending_targets = False
# Ready for pagination
self.collecting = False
def cache_target_page_counters(self, anchor_name, page_counter_values,
page_maker_index, page_maker):
"""Store target's current ``page_maker_index`` and page counter values.
Eventually update associated targeting boxes.
"""
# Only store page counters when paginating
if self.collecting:
return
item = self.target_lookup_items.get(anchor_name)
if item and item.state == 'up-to-date':
item.page_maker_index = page_maker_index
if item.cached_page_counter_values != page_counter_values:
item.cached_page_counter_values = copy.deepcopy(
page_counter_values)
# Spread the news: update boxes affected by a change in the
# anchor's page counter values.
for (_, css_token), item in self.counter_lookup_items.items():
# Only update items that need counters in their content
if css_token != 'content':
continue
# Don't update if item has no missing target counter
missing_counters = item.missing_target_counters.get(
anchor_name)
if missing_counters is None:
continue
# Pending marker for remake_page
if (item.page_maker_index is None or
item.page_maker_index >= len(page_maker)):
item.pending = True
continue
# TODO: Is the item at all interested in the new
# page_counter_values? It probably is and this check is a
# brake.
for counter_name in missing_counters:
counter_value = page_counter_values.get(counter_name)
if counter_value is not None:
remake_state = (
page_maker[item.page_maker_index][-1])
remake_state['content_changed'] = True
item.parse_again(item.cached_page_counter_values)
break
# Hint: the box's own cached page counters trigger a
# separate 'content_changed'.