/
spider.py
49 lines (36 loc) · 907 Bytes
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# coding: utf8
""" Base Spider"""
from conf.settings import Settings
from http.request import Request
from engine import Engine
class Spider(object):
""" Base Spider"""
custom_settings = None
def __init__(self):
if not hasattr(self, "start_urls"):
self.start_urls = []
# init settings
self.settings = Settings(self.custom_settings)
self.initialize()
def initialize(self):
"""initialize
"""
pass
def start_requests(self):
"""start_requests
"""
for url in self.start_urls:
yield Request(url)
def start(self):
"""start
"""
engine = Engine(self)
engine.start()
def parse(self, response):
"""parse
"""
raise NotImplementedError
def process_item(self, item):
"""process item
"""
pass