-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
60 lines (52 loc) · 1.7 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
var request = require('request');
var config = require('./config');
var washHTML = require('./washHTML');
var writeFile = require('./writeFile');
var lib = require('./lib');
var async = require('async');
writeFile.createFile();
config.options.url = lib.changeURLPar(config.options.url,'curpage',1)
var spider = {
init:function(){
this.startPage = config.startPage;
this.endPage = config.endPage;
this.intervalTime = config.intervalTime;
this.totalPages = [];
this.initTotalPages();
},
asyncLimit:function(){
var self = this;
async.mapLimit(self.totalPages,config.asyncLimit,function(option,callback){
self.getHtml(config,callback);
},function(err){
console.log(err);
})
},
initTotalPages:function(){
for(var i = this.startPage;i<=this.endPage+1;i++){
this.totalPages.push(i);
}
this.asyncLimit();
},
getHtml:function(option,callback){
var page = this.totalPages.shift();
if(page == this.endPage){
console.log('爬取完成');
return false
}
config.options.url = lib.changeURLPar(config.options.url,'curpage',page);
request(config.options,function(err,response, body){
console.log('正在爬取第'+page+'页');
if(err){
console.log(err);
console.log('爬取第'+page+'页出错')
}
washHTML.getDate(body);
setTimeout(function(){
console.log('第'+page+'页已完成,人工延迟:'+config.delay+'毫秒');
callback();
},config.delay);
})
}
}
spider.init();