From d850c226349821b683815e1620db3a1dd64ec5ec Mon Sep 17 00:00:00 2001 From: Nick Reese Date: Tue, 18 May 2021 14:00:50 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20wire=20up=20sitewide=20a?= =?UTF-8?q?uditing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/seo-check/README.md | 32 ++++- packages/seo-check/Tester.js | 177 +++++++++++++++++---------- packages/seo-check/index.js | 50 +++++++- packages/seo-check/package-lock.json | 131 -------------------- packages/seo-check/package.json | 3 +- packages/seo-check/rules.js | 51 ++++---- 6 files changed, 212 insertions(+), 232 deletions(-) delete mode 100644 packages/seo-check/package-lock.json diff --git a/packages/seo-check/README.md b/packages/seo-check/README.md index ff0bcf7..4c486cf 100644 --- a/packages/seo-check/README.md +++ b/packages/seo-check/README.md @@ -2,10 +2,21 @@ Checks the generated HTML for common SEO issues along with tips. -**ALPHA** +Works in single page mode and site wide mode. + +Pro users can easily use this plugin to fire off an email to the marketing/content team any time an SEO issue is encountered. ## Working Checks +### Sitewide + +These are only checked when Elder.js runs in build mode. + +- [x] check for orphaned pages (no incoming internal links) +- [x] check for broken internal links. +- [x] check for duplicate title tags +- [x] check for duplicate meta descriptions + ### Canonical - [x] canonical tag exists @@ -33,14 +44,14 @@ Checks the generated HTML for common SEO issues along with tips. - [x] Meta description is less than than 120 chars - [x] Meta description is longer than 300 chars (sometimes things go REALLY wrong and this helps catch it.) -- [x] Meta description includes ~20% of the keywords of the title - tag. (useful in my experience.) +- [x] Meta description includes at least one the keywords of the title + tag. ### HTags - [x] h1 Exists on page - [x] only a single h1 per page. -- [x] h1 has 10% of the words in the title tag +- [x] h1 has at least one word from your title tag - [x] h1 is less than 70 chars - [x] h1 is more than than 10 chars - [x] H2 or H3 don't exist if an H1 is missing. @@ -68,6 +79,8 @@ Checks the generated HTML for common SEO issues along with tips. - [x] Internal links have trailing slash - [x] Internal links are not `nofollow` - [x] Notifies if there are more than 50 outbound links on the page. +- [x] check for trailing `index.html` +- [x] internal fully formed links include 'https' ### Misc @@ -87,6 +100,17 @@ Once installed, open your `elder.config.js` and configure the plugin by adding ` ```javascript plugins: { '@elderjs/plugin-seo-check': { + display: ['errors', 'warnings'], // what level of reporting would you like. + handleSiteResults: async (results) => { // default. + // 'results' represents all of the issues found for the site wide build. + // power users can use this async function to post the issues to an endpoint or send an email + // so that the content or marketing team can address the issues. + if (Object.keys(results).length > 0) { + console.log(results); + } else { + console.log(`No SEO issues detected.`); + } + }, }, } diff --git a/packages/seo-check/Tester.js b/packages/seo-check/Tester.js index fc0c3d4..777df5e 100644 --- a/packages/seo-check/Tester.js +++ b/packages/seo-check/Tester.js @@ -32,8 +32,8 @@ const emptyRule = { info: [], }; -const Tester = function (rules, siteWide = false) { - this.internalLinks = new Set(); +const Tester = function (rules, display, siteWide) { + this.internalLinks = []; //[[link, linkedFrom]] this.pagesSeen = new Set(); this.currentUrl = ''; @@ -44,9 +44,14 @@ const Tester = function (rules, siteWide = false) { this.currentRule = JSON.parse(JSON.stringify(emptyRule)); this.results = []; + this.siteResults = { + duplicateTitles: [], + duplicateMetaDescriptions: [], + }; const logMetaDescription = (meta) => { if (this.metaDescriptions.has(meta)) { + this.siteResults.duplicateMetaDescriptions.push([this.metaDescriptions.get(meta), this.currentUrl]); } else { this.metaDescriptions.set(meta, this.currentUrl); } @@ -54,6 +59,7 @@ const Tester = function (rules, siteWide = false) { const logTitleTag = (title) => { if (this.titleTags.has(title)) { + this.siteResults.duplicateTitles.push([this.titleTags.get(title), this.currentUrl]); } else { this.titleTags.set(title, this.currentUrl); } @@ -87,11 +93,6 @@ const Tester = function (rules, siteWide = false) { }; }; - const tester = { - test: runTest(70, 'errors'), - lint: runTest(40, 'warnings'), - }; - const startRule = ({ validator, test, testData, ...payload }) => { if (this.currentRule.errors.length > 0) throw Error( @@ -109,70 +110,118 @@ const Tester = function (rules, siteWide = false) { this.currentRule = JSON.parse(JSON.stringify(emptyRule)); }; - return async (html, url) => { - this.currentUrl = url; - this.pagesSeen.add(url); - - const $ = cheerio.load(html); - - const result = { - html: $attributes($, 'html'), - title: $attributes($, 'title'), - meta: $attributes($, 'head meta'), - ldjson: $attributes($, 'script[type="application/ld+json"]'), - h1s: $attributes($, 'h1'), - h2s: $attributes($, 'h2'), - h3s: $attributes($, 'h3'), - h4s: $attributes($, 'h4'), - h5s: $attributes($, 'h5'), - h6s: $attributes($, 'h6'), - canonical: $attributes($, '[rel="canonical"]'), - imgs: $attributes($, 'img'), - aTags: $attributes($, 'a'), - linkTags: $attributes($, 'link'), - ps: $attributes($, 'p'), - }; - - if (siteWide) { - if (result.title[0] && result.title[0].innerText) { - logTitleTag(result.title[0].innerText); + return { + test: async (html, url) => { + try { + this.currentUrl = url; + this.pagesSeen.add(url); + + const $ = cheerio.load(html); + + const result = { + html: $attributes($, 'html'), + title: $attributes($, 'title'), + meta: $attributes($, 'head meta'), + ldjson: $attributes($, 'script[type="application/ld+json"]'), + h1s: $attributes($, 'h1'), + h2s: $attributes($, 'h2'), + h3s: $attributes($, 'h3'), + h4s: $attributes($, 'h4'), + h5s: $attributes($, 'h5'), + h6s: $attributes($, 'h6'), + canonical: $attributes($, '[rel="canonical"]'), + imgs: $attributes($, 'img'), + aTags: $attributes($, 'a'), + linkTags: $attributes($, 'link'), + ps: $attributes($, 'p'), + }; + + if (siteWide) { + if (result.title[0] && result.title[0].innerText) { + logTitleTag(result.title[0].innerText); + } + const metaDescription = result.meta.find((m) => m.name && m.name.toLowerCase() === 'description'); + if (metaDescription) { + logMetaDescription(metaDescription.content); + } + result.aTags + .filter((a) => !!a.href) + .filter((a) => !a.href.includes('http')) + .filter((a) => { + if (this.currentUrl !== '/') { + return !a.href.endsWith(this.currentUrl); + } + return true; + }) + .filter((a) => a.href !== this.currentUrl) + .map((a) => a.href) + .forEach((a) => this.internalLinks.push([a, this.currentUrl])); + } + + for (let i = 0; i < rules.length; i++) { + const rule = rules[i]; + startRule(rule); + await rule.validator( + { result, response: { url } }, + { + test: runTest(70, 'errors'), + lint: runTest(40, 'warnings'), + }, + ); + finishRule(); + } + + const validDisplay = ['warnings', 'errors']; + const out = display + .filter((d) => validDisplay.includes(d)) + .reduce((out, key) => { + return [ + ...out, + ...this.results + .filter((r) => !r.success) + .sort((a, b) => a.priority > b.priority) + .reduce((o, ruleResult) => { + return [...o, ...ruleResult[key].map((r) => ({ ...r, level: key }))]; + }, []), + ]; + }, []); + + if (siteWide) { + this.siteResults[url] = out; + } else { + if (out.length > 0) { + // eslint-disable-next-line node/no-unsupported-features/node-builtins + console.table(out); + } + } + + this.results = []; + } catch (e) { + console.error(e); } - const metaDescription = result.meta.find((m) => m.name && m.name.toLowerCase() === 'description'); - if (metaDescription) { - logMetaDescription(metaDescription.content); + }, + siteResults: async () => { + this.siteResults.orphanPages = []; + for (const page of this.pagesSeen.values()) { + if (!this.internalLinks.find((il) => il[0] === page)) this.siteResults.orphanPages.push(page); } - result.aTags.filter((a) => !a.href.includes('http')).forEach((a) => this.internalLinks.add(a.href)); - } + this.siteResults.brokenInternalLinks = []; + for (const [link, linker] of this.internalLinks) { + if (!this.pagesSeen.has(link)) this.siteResults.brokenInternalLinks.push({ link, linker }); + } - for (let i = 0; i < rules.length; i++) { - const rule = rules[i]; - startRule(rule); - await rule.validator({ result, response: { url } }, tester); - finishRule(); - } + const results = Object.keys(this.siteResults).reduce((out, key) => { + if (Array.isArray(this.siteResults[key]) && this.siteResults[key].length > 0) { + out[key] = this.siteResults[key]; + } + return out; + }, {}); - const out = ['errors', 'warnings'].reduce((out, key) => { - return [ - ...out, - ...this.results - .filter((r) => !r.success) - .sort((a, b) => a.priority > b.priority) - .reduce((o, ruleResult) => { - return [...o, ...ruleResult[key].map((r) => ({ ...r, level: key }))]; - }, []), - ]; - }, []); - - console.table(out); - - this.results = []; + return results; + }, }; }; // eslint-disable-next-line jest/no-export module.exports = Tester; - -// accept rules one time. -// offer a function that tests all of the rules for a url. -// if in build mode test site wide rules. diff --git a/packages/seo-check/index.js b/packages/seo-check/index.js index 59dd25a..f7e0fe0 100644 --- a/packages/seo-check/index.js +++ b/packages/seo-check/index.js @@ -1,3 +1,7 @@ +const glob = require('tiny-glob'); +const path = require('path'); +const fs = require('fs-extra'); + const Tester = require('./Tester'); const rules = require('./rules'); @@ -8,21 +12,55 @@ const plugin = { init: (plugin) => { // used to store the data in the plugin's closure so it is persisted between loads - plugin.test = new Tester(rules, plugin.config.display, plugin.settings.build); + plugin.tester = new Tester(rules, plugin.config.display, plugin.settings.context === 'build'); return plugin; }, config: { - display: ['errors', 'warnings'], + display: ['errors', 'warnings'], // what level of reporting would you like. + handleSiteResults: async (results) => { + // 'results' represents all of the issues found for the site wide build. + // power users can use this async function to post the issues to an endpoint or send an email + // so that the content or marketing team can address the issues. + if (Object.keys(results).length > 0) { + console.log(results); + } else { + console.log(`No SEO issues detected.`); + } + }, }, hooks: [ { hook: 'html', name: 'evaluateHtml', - description: 'Lints the elder.js response html', - run: async ({ request, plugin, htmlString }) => { - if (notProd) { - await plugin.test(htmlString, request.permalink); + description: 'Check the elder.js response html for common SEO issues.', + run: async ({ request, plugin, htmlString, settings }) => { + if (notProd && settings.context !== 'build') { + await plugin.tester.test(htmlString, request.permalink); + } + }, + }, + { + hook: 'buildComplete', + name: 'siteWideSeoCheck', + description: 'test', + run: async ({ settings, plugin, allRequests }) => { + if (settings.context === 'build') { + const files = await glob(`${settings.distDir}/**/*.html`); + const publicFolder = path.relative(settings.rootDir, settings.distDir); + + for (let i = 0; i < files.length; i++) { + const file = files[i]; + + const html = fs.readFileSync(path.resolve(file), { encoding: 'utf-8' }); + + const relPermalink = file.replace('index.html', '').replace(publicFolder, ''); + await plugin.tester.test(html, relPermalink); + } + + const results = await plugin.tester.siteResults(); + + plugin.config.handleSiteResults(results); } }, }, diff --git a/packages/seo-check/package-lock.json b/packages/seo-check/package-lock.json deleted file mode 100644 index 3b58df7..0000000 --- a/packages/seo-check/package-lock.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "name": "seo-check", - "version": "0.1.29", - "lockfileVersion": 1, - "requires": true, - "dependencies": { - "boolbase": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", - "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" - }, - "cheerio": { - "version": "1.0.0-rc.9", - "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.9.tgz", - "integrity": "sha512-QF6XVdrLONO6DXRF5iaolY+odmhj2CLj+xzNod7INPWMi/x9X4SOylH0S/vaPpX+AUU6t04s34SQNh7DbkuCng==", - "requires": { - "cheerio-select": "^1.4.0", - "dom-serializer": "^1.3.1", - "domhandler": "^4.2.0", - "htmlparser2": "^6.1.0", - "parse5": "^6.0.1", - "parse5-htmlparser2-tree-adapter": "^6.0.1", - "tslib": "^2.2.0" - } - }, - "cheerio-select": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.4.0.tgz", - "integrity": "sha512-sobR3Yqz27L553Qa7cK6rtJlMDbiKPdNywtR95Sj/YgfpLfy0u6CGJuaBKe5YE/vTc23SCRKxWSdlon/w6I/Ew==", - "requires": { - "css-select": "^4.1.2", - "css-what": "^5.0.0", - "domelementtype": "^2.2.0", - "domhandler": "^4.2.0", - "domutils": "^2.6.0" - } - }, - "css-select": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.1.2.tgz", - "integrity": "sha512-nu5ye2Hg/4ISq4XqdLY2bEatAcLIdt3OYGFc9Tm9n7VSlFBcfRv0gBNksHRgSdUDQGtN3XrZ94ztW+NfzkFSUw==", - "requires": { - "boolbase": "^1.0.0", - "css-what": "^5.0.0", - "domhandler": "^4.2.0", - "domutils": "^2.6.0", - "nth-check": "^2.0.0" - } - }, - "css-what": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/css-what/-/css-what-5.0.0.tgz", - "integrity": "sha512-qxyKHQvgKwzwDWC/rGbT821eJalfupxYW2qbSJSAtdSTimsr/MlaGONoNLllaUPZWf8QnbcKM/kPVYUQuEKAFA==" - }, - "dom-serializer": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.3.2.tgz", - "integrity": "sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig==", - "requires": { - "domelementtype": "^2.0.1", - "domhandler": "^4.2.0", - "entities": "^2.0.0" - } - }, - "domelementtype": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.2.0.tgz", - "integrity": "sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==" - }, - "domhandler": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.2.0.tgz", - "integrity": "sha512-zk7sgt970kzPks2Bf+dwT/PLzghLnsivb9CcxkvR8Mzr66Olr0Ofd8neSbglHJHaHa2MadfoSdNlKYAaafmWfA==", - "requires": { - "domelementtype": "^2.2.0" - } - }, - "domutils": { - "version": "2.6.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.6.0.tgz", - "integrity": "sha512-y0BezHuy4MDYxh6OvolXYsH+1EMGmFbwv5FKW7ovwMG6zTPWqNPq3WF9ayZssFq+UlKdffGLbOEaghNdaOm1WA==", - "requires": { - "dom-serializer": "^1.0.1", - "domelementtype": "^2.2.0", - "domhandler": "^4.2.0" - } - }, - "entities": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", - "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==" - }, - "htmlparser2": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-6.1.0.tgz", - "integrity": "sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==", - "requires": { - "domelementtype": "^2.0.1", - "domhandler": "^4.0.0", - "domutils": "^2.5.2", - "entities": "^2.0.0" - } - }, - "nth-check": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz", - "integrity": "sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==", - "requires": { - "boolbase": "^1.0.0" - } - }, - "parse5": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" - }, - "parse5-htmlparser2-tree-adapter": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz", - "integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==", - "requires": { - "parse5": "^6.0.1" - } - }, - "tslib": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.2.0.tgz", - "integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==" - } - } -} diff --git a/packages/seo-check/package.json b/packages/seo-check/package.json index ec9e2b6..33eabdd 100644 --- a/packages/seo-check/package.json +++ b/packages/seo-check/package.json @@ -32,6 +32,7 @@ "url": "https://github.com/Elderjs/plugins/issues" }, "dependencies": { - "cheerio": "^1.0.0-rc.9" + "cheerio": "^1.0.0-rc.9", + "tiny-glob": "^0.2.9" } } diff --git a/packages/seo-check/rules.js b/packages/seo-check/rules.js index 7e02985..e372382 100644 --- a/packages/seo-check/rules.js +++ b/packages/seo-check/rules.js @@ -5,6 +5,16 @@ const seoLint = { internalLinksTrailingSlash: true, }; +const cleanString = (str) => + str + .toLowerCase() + .replace('|', '') + .replace('-', '') + .replace('.', '') + .replace(':', '') + .replace('!', '') + .replace('?', ''); + const rules = [ { name: 'Canonical Tag', @@ -156,13 +166,11 @@ const rules = [ ); if (payload.result.title[0]) { - const titleArr = payload.result.title[0].innerText - .toLowerCase() + const titleArr = cleanString(payload.result.title[0].innerText) .split(' ') .filter((i) => [':', '|', '-'].indexOf(i) === -1); - const compareArr = metas[0].content - .toLowerCase() + const compareArr = cleanString(metas[0].content) .split(' ') .filter((i) => [':', '|', '-'].indexOf(i) === -1); @@ -171,8 +179,8 @@ const rules = [ tester.lint( 70, assert.ok, - matches.length > Math.floor(titleArr.length * 0.2), - 'Meta description should include at least 20% of the words in the title tag.', + matches.length >= 1, + 'Meta description should include at least 1 of the words in the title tag.', ); } } @@ -203,7 +211,7 @@ const rules = [ }, }, validator: async (payload, tester) => { - const { h1s, h2s, h3s, h4s, h5s, h6s, title } = payload.result; + const { h1s, h2s, h3s, h4s, h5s, h6s, title, html } = payload.result; tester.test( 90, assert.ok, @@ -213,8 +221,7 @@ const rules = [ let titleArr; if (title[0]) { - titleArr = title[0].innerText - .toLowerCase() + titleArr = cleanString(title[0].innerText) .split(' ') .filter((i) => [':', '|', '-'].indexOf(i) === -1); } @@ -233,19 +240,15 @@ const rules = [ ); if (titleArr) { - const compareArr = h1s[0].innerText - .toLowerCase() + const compareArr = cleanString(h1s[0].innerText) .split(' ') .filter((i) => [':', '|', '-'].indexOf(i) === -1); const matches = titleArr.filter((t) => compareArr.indexOf(t) !== -1); - tester.lint( - 70, - assert.ok, - matches.length > Math.floor(titleArr.length * 0.1), - `H1 tag should have at least 10% of the words in the title tag.`, - ); + if (matches.length < 1) console.log(titleArr, compareArr); + + tester.lint(70, assert.ok, matches.length >= 1, `H1 tag should have at least 1 word from your title tag.`); } } else { tester.test(assert.ok, h2s.length === 0, `No h1 tag, but h2 tags are defined.`); @@ -253,7 +256,9 @@ const rules = [ } let usesKeywords = false; - tester.lint(60, assert.ok, h2s.length >= 1, 'Page is missing an h2 tag.'); + if (html[0].innerText.length > 3000) { + tester.lint(60, assert.ok, h2s.length >= 1, 'Page is missing an h2 tag.'); + } h2s.forEach((h2) => { tester.test(80, assert.notEqual, h2.innerText.length, 0, 'H2 tags should not be empty'); tester.lint( @@ -267,8 +272,7 @@ const rules = [ `H2 tag is shorter than the recommended limit of 10. (${h2.innerText})`, ); - const compareArr = h2.innerText - .toLowerCase() + const compareArr = cleanString(h2.innerText) .split(' ') .filter((i) => [':', '|', '-'].indexOf(i) === -1); @@ -279,12 +283,7 @@ const rules = [ }); if (h2s.length > 0) { - tester.lint( - 70, - assert.ok, - usesKeywords, - `None of your h2 tags use a single word from your title tag. Investigate.`, - ); + tester.lint(70, assert.ok, usesKeywords, `None of your h2 tags use a single word from your title tag.`); } usesKeywords = false;