forked from scrapy/scrapy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
processor.py
115 lines (87 loc) · 3.23 KB
/
processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
This module provides some commonly used processors for Item Loaders.
See documentation in docs/topics/loaders.rst
"""
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.datatypes import MergeDict
from .common import wrap_loader_context
class MapCompose(object):
def __init__(self, *functions, **default_loader_context):
self.functions = functions
self.default_loader_context = default_loader_context
def __call__(self, value, loader_context=None):
values = arg_to_iter(value)
if loader_context:
context = MergeDict(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
next_values = []
for v in values:
next_values += arg_to_iter(func(v))
values = next_values
return values
class Compose(object):
def __init__(self, *functions, **default_loader_context):
self.functions = functions
self.stop_on_none = default_loader_context.get('stop_on_none', True)
self.default_loader_context = default_loader_context
def __call__(self, value, loader_context=None):
if loader_context:
context = MergeDict(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
if value is None and self.stop_on_none:
break
value = func(value)
return value
class Filter(object):
'''
>>> Filter()(['A', 0, '', 0.0, None, -1])
('A', 0, 0.0, -1)
>>> Filter(None)(['A', 0, '', 0.00, None, -1])
('A', -1)
>>> Filter(lambda s: len(str(s)) > 1)(['A', 0, '', 0.00, None, -1])
(0.0, None, -1)
'''
def __init__(self, function=lambda v: v is not None and v != ''):
self.function = function
def __call__(self, values):
return tuple(filter(self.function, values))
class Slice(object):
'''
>>> Slice()([2, 3, 5, 7])
[2, 3, 5, 7]
>>> Slice(None, 3)([2, 3, 5, 7])
[2, 3, 5]
'''
def __init__(self, begin=None, end=None):
self.begin, self.end = begin, end
def __call__(self, values):
return values[self.begin:self.end]
class TakeFirst(object):
'''
>>> [TakeFirst()(c) for c in ((0,'A'), ('', 0), (None, 'A'))]
[0, 0, 'A']
>>> [TakeFirst(None)(c) for c in ((0,'A'), ('', 0), (None, 'A'))]
['A', None, 'A']
>>> [TakeFirst(lambda s: len(str(s)) > 1)(c) for c in ((0,'A'), ('', 0), (None, 'A'))]
[None, None, None]
'''
def __init__(self, function=lambda v: v is not None and v != ''):
self.function = bool if function is None else function
def __call__(self, values):
for value in values:
if self.function(value):
return value
class Identity(object):
def __call__(self, values):
return values
class Join(object):
def __init__(self, separator=u' '):
self.separator = separator
def __call__(self, values):
return self.separator.join(values)