Skip to content

JerrZhang/ztx

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

5 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

node scrawl

node spiders

Installation

npm install --save ztx

Usage

create spiders

  • create spider directory in root dir
mkdir spiders
  • create spider files in directory extend ztx.Spider
const Spider = require('ztx').Spider;
const import$ = require('ztx').import$;
const core = require('ztx').Core;

module.exports = class CnblogsSpider extends Spider {
    constructor() {
        super()
        this.name = "test cnblogs";
        this.start_urls = "https://www.cnblogs.com/cnblogs";
        //爬虫中间件 用于获取和处理parse 后的数据
        this.use((ctx, next) => {
            console.log(ctx.value);
            // next();
        });
    }

    * parse(res) {
        let $ = import$.load(res);
        let titles = $('.post-list-item .PostTitle');
        for (let i = 0; i < titles.length; i++) {
            yield $(titles[i]).text();
        }


        let $next = $('#pager>a').last();
        let nextUrl = $next.attr('href');
        //获取下一个链接的地址
        if (nextUrl) {
            core.nextRequest(nextUrl, this.parse);
        }

    }
}
  • start ztx
const app = require('ztx').Core;

app.start();

About

node爬虫框架ztx 简单易用

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published