Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Crawl multiple domains using one or more entry URLs.
JavaScript
Branch: master

Fetching latest commit…

Cannot retrieve the latest commit at this time

Failed to load latest commit information.
.gitignore
LICENSE
README.md
index.js
package.json
spider-error.js
spiderweb.js

README.md

Spiderweb

Crawl multiple domains using one or more entry URLs.

Installation

npm install spiderweb
var Spiderweb = require('spiderweb'),
    urls, options, spiderweb;

urls = ['bar.com', 'foo.bar.com'];

options = {
    strictSSL: false,
    images: true,
    excludedUrls: ['*biz.foo.com*', '*/admin/*']
};

spiderweb = new Spiderweb(urls, options);

spider.pageHandler = function(err, resp, body, entry) {
    console.log(entry.url);
    Spiderweb.prototype.pageHandler.apply(this, arguments);
}

spiderweb.start(function(err, log) {
    if (err) {
        console.log('ERROR: ', err)
    }

    if (log.length) {
        console.log('DONE: ' + log.length + ' page errors');
    }
    else {
        console.log('DONE: no errors');
    }
});
Something went wrong with that request. Please try again.