diff --git a/index.js b/index.js index 252ed43..419144b 100644 --- a/index.js +++ b/index.js @@ -22,10 +22,7 @@ const getText = $ => const isXmlUrl = url => REGEX_URL_XML.test(path.extname(url)) -const xmlUrls = async ( - url, - { cheerioOpts = {}, whitelist = false, ...opts } = {} -) => { +const xmlUrls = async (url, { cheerioOpts = {}, whitelist = false, ...opts } = {}) => { const { origin: baseUrl } = new URL(url) const { html } = await getHTML(url, opts) const $ = cheerio.load(html, { xmlMode: true, ...cheerioOpts }) @@ -38,9 +35,7 @@ const xmlUrls = async ( const iterator = async (set, url) => { const match = !isEmpty(whitelist) && matcher([url], concat(whitelist)) if (!isEmpty(match)) return set - const urls = isXmlUrl(url) - ? await xmlUrls(url, opts) - : [normalizeUrl(baseUrl, url)] + const urls = isXmlUrl(url) ? await xmlUrls(url, opts) : [normalizeUrl(baseUrl, url)] return new Set([...set, ...urls]) } diff --git a/package.json b/package.json index ed84ee4..1d1e3a6 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "nyc": "latest", "prettier-standard": "latest", "puppeteer": "latest", + "signal-exit": "latest", "simple-git-hooks": "latest", "standard": "latest", "standard-markdown": "latest", @@ -72,6 +73,12 @@ "update:check": "ncu -- --error-level 2" }, "license": "MIT", + "ava": { + "files": [ + "test/**/*", + "!test/util.js" + ] + }, "commitlint": { "extends": [ "@commitlint/config-conventional" @@ -79,7 +86,7 @@ }, "lint-staged": { "package.json": [ - "finepack" + "finepack --sort-ignore-object-at ava" ], "*.js": [ "prettier-standard" diff --git a/test/index.js b/test/index.js index fc4338c..d857a19 100644 --- a/test/index.js +++ b/test/index.js @@ -3,6 +3,7 @@ const test = require('ava') const xmlUrls = require('..') +const { getBrowserless } = require('./util') /** * tests files at: https://gist.github.com/Kikobeats/317550e76f1cbd399cebe3bddc0c146b */ @@ -16,25 +17,27 @@ const fixtures = { } test('Get all URLs from a plain sitemap', async t => { - const urls = await xmlUrls(fixtures.sitemap, { prerender: false }) + const urls = await xmlUrls(fixtures.sitemap, { prerender: false, getBrowserless }) t.snapshot(urls) }) test('Remove duplicates', async t => { const urls = await xmlUrls(fixtures.sitemaWithDuplicates, { - prerender: false + prerender: false, + getBrowserless }) t.snapshot(urls) }) test('Get all URLs from more than one sitemap', async t => { const urls = await xmlUrls([fixtures.sitemap, fixtures.sitemap], { - prerender: false + prerender: false, + getBrowserless }) t.snapshot(urls) }) test('Get all URLs from a sitemap of sitemaps', async t => { - const urls = await xmlUrls(fixtures.sitemapOfSitemaps, { prerender: false }) + const urls = await xmlUrls(fixtures.sitemapOfSitemaps, { prerender: false, getBrowserless }) t.snapshot(urls) }) diff --git a/test/util.js b/test/util.js new file mode 100644 index 0000000..159041a --- /dev/null +++ b/test/util.js @@ -0,0 +1,11 @@ +'use strict' + +const createBrowserless = require('browserless') +const onExit = require('signal-exit') + +const browserlessFactory = createBrowserless() +onExit(browserlessFactory.close) + +module.exports = { + getBrowserless: () => browserlessFactory +} diff --git a/test/whitelist.js b/test/whitelist.js index f73e9ea..c4b562f 100644 --- a/test/whitelist.js +++ b/test/whitelist.js @@ -3,11 +3,14 @@ const test = require('ava') const xmlUrls = require('..') +const { getBrowserless } = require('./util') + test('Exclude urls based on pattern', async t => { const url = 'https://rawgit.com/Kikobeats/317550e76f1cbd399cebe3bddc0c146b/raw/40a311a2930d19755332818ee4a8a14e59b6ef04/sitemap.xml' const urls = await xmlUrls(url, { prerender: false, + getBrowserless, whitelist: ['*examples*'] })