Skip to content

Commit

Permalink
test: add getBrowserless util
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Jul 1, 2021
1 parent 828db1f commit 6502208
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 12 deletions.
9 changes: 2 additions & 7 deletions index.js
Expand Up @@ -22,10 +22,7 @@ const getText = $ =>

const isXmlUrl = url => REGEX_URL_XML.test(path.extname(url))

const xmlUrls = async (
url,
{ cheerioOpts = {}, whitelist = false, ...opts } = {}
) => {
const xmlUrls = async (url, { cheerioOpts = {}, whitelist = false, ...opts } = {}) => {
const { origin: baseUrl } = new URL(url)
const { html } = await getHTML(url, opts)
const $ = cheerio.load(html, { xmlMode: true, ...cheerioOpts })
Expand All @@ -38,9 +35,7 @@ const xmlUrls = async (
const iterator = async (set, url) => {
const match = !isEmpty(whitelist) && matcher([url], concat(whitelist))
if (!isEmpty(match)) return set
const urls = isXmlUrl(url)
? await xmlUrls(url, opts)
: [normalizeUrl(baseUrl, url)]
const urls = isXmlUrl(url) ? await xmlUrls(url, opts) : [normalizeUrl(baseUrl, url)]
return new Set([...set, ...urls])
}

Expand Down
9 changes: 8 additions & 1 deletion package.json
Expand Up @@ -45,6 +45,7 @@
"nyc": "latest",
"prettier-standard": "latest",
"puppeteer": "latest",
"signal-exit": "latest",
"simple-git-hooks": "latest",
"standard": "latest",
"standard-markdown": "latest",
Expand Down Expand Up @@ -72,14 +73,20 @@
"update:check": "ncu -- --error-level 2"
},
"license": "MIT",
"ava": {
"files": [
"test/**/*",
"!test/util.js"
]
},
"commitlint": {
"extends": [
"@commitlint/config-conventional"
]
},
"lint-staged": {
"package.json": [
"finepack"
"finepack --sort-ignore-object-at ava"
],
"*.js": [
"prettier-standard"
Expand Down
11 changes: 7 additions & 4 deletions test/index.js
Expand Up @@ -3,6 +3,7 @@
const test = require('ava')
const xmlUrls = require('..')

const { getBrowserless } = require('./util')
/**
* tests files at: https://gist.github.com/Kikobeats/317550e76f1cbd399cebe3bddc0c146b
*/
Expand All @@ -16,25 +17,27 @@ const fixtures = {
}

test('Get all URLs from a plain sitemap', async t => {
const urls = await xmlUrls(fixtures.sitemap, { prerender: false })
const urls = await xmlUrls(fixtures.sitemap, { prerender: false, getBrowserless })
t.snapshot(urls)
})

test('Remove duplicates', async t => {
const urls = await xmlUrls(fixtures.sitemaWithDuplicates, {
prerender: false
prerender: false,
getBrowserless
})
t.snapshot(urls)
})

test('Get all URLs from more than one sitemap', async t => {
const urls = await xmlUrls([fixtures.sitemap, fixtures.sitemap], {
prerender: false
prerender: false,
getBrowserless
})
t.snapshot(urls)
})

test('Get all URLs from a sitemap of sitemaps', async t => {
const urls = await xmlUrls(fixtures.sitemapOfSitemaps, { prerender: false })
const urls = await xmlUrls(fixtures.sitemapOfSitemaps, { prerender: false, getBrowserless })
t.snapshot(urls)
})
11 changes: 11 additions & 0 deletions test/util.js
@@ -0,0 +1,11 @@
'use strict'

const createBrowserless = require('browserless')
const onExit = require('signal-exit')

const browserlessFactory = createBrowserless()
onExit(browserlessFactory.close)

module.exports = {
getBrowserless: () => browserlessFactory
}
3 changes: 3 additions & 0 deletions test/whitelist.js
Expand Up @@ -3,11 +3,14 @@
const test = require('ava')
const xmlUrls = require('..')

const { getBrowserless } = require('./util')

test('Exclude urls based on pattern', async t => {
const url =
'https://rawgit.com/Kikobeats/317550e76f1cbd399cebe3bddc0c146b/raw/40a311a2930d19755332818ee4a8a14e59b6ef04/sitemap.xml'
const urls = await xmlUrls(url, {
prerender: false,
getBrowserless,
whitelist: ['*examples*']
})

Expand Down

0 comments on commit 6502208

Please sign in to comment.