Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new_audit(crawlable-anchors): adds an anchor-href audit #10662

Merged
merged 18 commits into from
May 20, 2020
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions lighthouse-cli/test/cli/__snapshots__/index-test.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,9 @@ Object {
Object {
"path": "seo/link-text",
},
Object {
"path": "seo/crawlable-anchors",
},
Object {
"path": "seo/is-crawlable",
},
Expand Down Expand Up @@ -1117,6 +1120,11 @@ Object {
"id": "link-text",
"weight": 1,
},
Object {
"group": "seo-crawl",
"id": "crawlable-anchors",
"weight": 1,
},
Object {
"group": "seo-crawl",
"id": "is-crawlable",
Expand Down
96 changes: 96 additions & 0 deletions lighthouse-core/audits/seo/crawlable-anchors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/**
* @license Copyright 2020 The Lighthouse Authors. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const Audit = require('../audit.js');
const i18n = require('../../lib/i18n/i18n.js');

const UIStrings = {
/** Title of a Lighthouse audit that provides detail on whether links have potentially-crawlable href attributes. This descriptive title is shown when all links on the page are potentially-crawlable. */
title: 'Links have potentially-crawlable href attributes',
/** Descriptive title of a Lighthouse audit that provides detail on whether links have potentially-crawlable href attributes. This descriptive title is shown when there are href attributes which are not crawlable by search engines. */
failureTitle: 'Links do not have crawlable href attributes',
/** Description of a Lighthouse audit that tells the user why href attributes on links should be crawlable. This is displayed after a user expands the section to see more. 'Learn More' becomes link text to additional documentation. */
description: 'Search engines may use href attributes on links to crawl websites. Ensure that the `href` attribute of anchor elements links to an appropriate destination, so more pages of the site can be discovered. [Learn More](https://support.google.com/webmasters/answer/9112205)',
umaar marked this conversation as resolved.
Show resolved Hide resolved
/** Label for a column in a data table; entries will be the HTML anchor elements that failed the audit. Anchors are DOM elements that are links. */
columnFailingLink: 'Uncrawlable Link',
};

const str_ = i18n.createMessageInstanceIdFn(__filename, UIStrings);

class CrawlableAnchors extends Audit {
/**
* @return {LH.Audit.Meta}
*/
static get meta() {
return {
id: 'crawlable-anchors',
title: str_(UIStrings.title),
failureTitle: str_(UIStrings.failureTitle),
description: str_(UIStrings.description),
requiredArtifacts: ['AnchorElements'],
};
}

/**
* @param {LH.Artifacts} artifacts
* @return {LH.Audit.Product}
*/
static audit({AnchorElements: anchorElements}) {
const failingAnchors = anchorElements.filter(({
rawHref,
listeners = [],
onclick = '',
name = '',
}) => {
onclick = onclick.replace( /\s/g, '');
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved
name = name.trim();
rawHref = rawHref.replace( /\s/g, '');

const windowLocationRegExp = /window\.location=/;
const windowOpenRegExp = /window\.open\(/;
const javaScriptVoidRegExp = /javascript:void(\(|)0(\)|)/;
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved

if (rawHref.startsWith('file:')) return true;
if (windowLocationRegExp.test(onclick)) return true;
if (windowOpenRegExp.test(onclick)) return true;

const hasClickHandler = listeners.some(({type}) => type === 'click');
if (hasClickHandler || name.trim().length > 0) return;

if (rawHref === '') return true;
if (javaScriptVoidRegExp.test(rawHref)) return true;
});

/** @type {LH.Audit.Details.Table['headings']} */
const headings = [{
key: 'node',
itemType: 'node',
text: str_(UIStrings.columnFailingLink),
}];

/** @type {LH.Audit.Details.Table['items']} */
const itemsToDisplay = failingAnchors.map(anchor => {
return {
node: {
type: 'node',
umaar marked this conversation as resolved.
Show resolved Hide resolved
path: anchor.devtoolsNodePath || '',
selector: anchor.selector || '',
nodeLabel: anchor.nodeLabel || '',
snippet: anchor.outerHTML || '',
},
};
});

return {
score: Number(failingAnchors.length === 0),
details: Audit.makeTableDetails(headings, itemsToDisplay),
};
}
}

module.exports = CrawlableAnchors;
module.exports.UIStrings = UIStrings;
2 changes: 2 additions & 0 deletions lighthouse-core/config/default-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ const defaultConfig = {
'seo/http-status-code',
'seo/font-size',
'seo/link-text',
'seo/crawlable-anchors',
'seo/is-crawlable',
'seo/robots-txt',
'seo/tap-targets',
Expand Down Expand Up @@ -563,6 +564,7 @@ const defaultConfig = {
{id: 'meta-description', weight: 1, group: 'seo-content'},
{id: 'http-status-code', weight: 1, group: 'seo-crawl'},
{id: 'link-text', weight: 1, group: 'seo-content'},
{id: 'crawlable-anchors', weight: 1, group: 'seo-crawl'},
{id: 'is-crawlable', weight: 1, group: 'seo-crawl'},
{id: 'robots-txt', weight: 1, group: 'seo-crawl'},
{id: 'image-alt', weight: 1, group: 'seo-content'},
Expand Down
45 changes: 43 additions & 2 deletions lighthouse-core/gather/gatherers/anchor-elements.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ function collectAnchorElements() {
if (node instanceof HTMLAnchorElement) {
return {
href: node.href,
rawHref: node.getAttribute('href') || '',
onclick: node.getAttribute('onclick') || '',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still a little nervous about pathological sites with ridiculously long onclick handlers. I imagine they have to be out there :) @patrickhulce do you think the string length limit you mentioned before for a different thing would be ok to add to these?

(and if someday we need onclick to always be parseable js (or whatever) so it can't be truncated we can always lift the restriction then)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't imagine the risk is anywhere near what a addEventListener handler is though.

I'm fine with a .slice(0, 1024) or whatever to err on the side of caution though if you think this is a legit issue 🤷

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with a .slice(0, 1024) or whatever to err on the side of caution though if you think this is a legit issue 🤷

I would feel better, yes :) It's a public artifact and if it turns out badly we can't walk it back so easily.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now doing a onclick.slice(0, 1024)

name: node.name,
text: node.innerText, // we don't want to return hidden text, so use innerText
rel: node.rel,
target: node.target,
Expand All @@ -59,6 +62,8 @@ function collectAnchorElements() {

return {
href: resolveURLOrEmpty(node.href.baseVal),
rawHref: node.getAttribute('href') || '',
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved
onclick: node.getAttribute('onclick') || '',
text: node.textContent || '',
rel: '',
target: node.target.baseVal || '',
Expand All @@ -70,6 +75,26 @@ function collectAnchorElements() {
});
}

/**
* @param {LH.Gatherer.PassContext['driver']} driver
* @param {string} devtoolsNodePath
*/
async function getEventListeners(driver, devtoolsNodePath) {
const {nodeId} = await driver.sendCommand('DOM.pushNodeByPathToFrontend', {
path: devtoolsNodePath,
});

const {object: {objectId = ''}} = await driver.sendCommand('DOM.resolveNode', {
nodeId,
});

const response = await driver.sendCommand('DOMDebugger.getEventListeners', {
objectId,
});

return response.listeners.map(({type}) => ({type}));
}

class AnchorElements extends Gatherer {
/**
* @param {LH.Gatherer.PassContext} passContext
Expand All @@ -87,8 +112,24 @@ class AnchorElements extends Gatherer {
return (${collectAnchorElements})();
})()`;

/** @type {Array<LH.Artifacts.AnchorElement>} */
return driver.evaluateAsync(expression, {useIsolation: true});
/** @type {LH.Artifacts['AnchorElements']} */
const anchors = await driver.evaluateAsync(expression, {useIsolation: true});
await driver.sendCommand('DOM.enable');

// DOM.getDocument is necessary for pushNodesByBackendIdsToFrontend to properly retrieve nodeIds if the `DOM` domain was enabled before this gatherer, invoke it to be safe.
await driver.sendCommand('DOM.getDocument', {depth: -1, pierce: true});
const anchorsWithEventListeners = anchors.map(async anchor => {
const listeners = await getEventListeners(driver, anchor.devtoolsNodePath);

return {
...anchor,
listeners,
};
});

const result = await Promise.all(anchorsWithEventListeners);
await driver.sendCommand('DOM.disable');
return result;
}
}

Expand Down
12 changes: 12 additions & 0 deletions lighthouse-core/lib/i18n/locales/en-US.json
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,18 @@
"lighthouse-core/audits/seo/canonical.js | title": {
"message": "Document has a valid `rel=canonical`"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | columnFailingLink": {
"message": "Uncrawlable Link"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | description": {
"message": "Search engines may use href attributes on links to crawl websites. Ensure that the `href` attribute of anchor elements links to an appropriate destination, so more pages of the site can be discovered. [Learn More](https://support.google.com/webmasters/answer/9112205)"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | failureTitle": {
"message": "Links do not have crawlable href attributes"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | title": {
"message": "Links have potentially-crawlable href attributes"
},
"lighthouse-core/audits/seo/font-size.js | description": {
"message": "Font sizes less than 12px are too small to be legible and require mobile visitors to “pinch to zoom” in order to read. Strive to have >60% of page text ≥12px. [Learn more](https://web.dev/font-size)."
},
Expand Down
12 changes: 12 additions & 0 deletions lighthouse-core/lib/i18n/locales/en-XL.json
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,18 @@
"lighthouse-core/audits/seo/canonical.js | title": {
"message": "D̂óĉúm̂én̂t́ ĥáŝ á v̂ál̂íd̂ `rel=canonical`"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | columnFailingLink": {
"message": "Ûńĉŕâẃl̂áb̂ĺê Ĺîńk̂"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | description": {
"message": "Ŝéâŕĉh́ êńĝín̂éŝ ḿâý ûśê h́r̂éf̂ át̂t́r̂íb̂út̂éŝ ón̂ ĺîńk̂ś t̂ó ĉŕâẃl̂ ẃêb́ŝít̂éŝ. Én̂śûŕê t́ĥát̂ t́ĥé `href` ât́t̂ŕîb́ût́ê óf̂ án̂ćĥór̂ él̂ém̂én̂t́ŝ ĺîńk̂ś t̂ó âń âṕp̂ŕôṕr̂íât́ê d́êśt̂ín̂át̂íôń, ŝó m̂ór̂é p̂áĝéŝ óf̂ t́ĥé ŝít̂é ĉán̂ b́ê d́îśĉóv̂ér̂éd̂. [Ĺêár̂ń M̂ór̂é](https://support.google.com/webmasters/answer/9112205)"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | failureTitle": {
"message": "L̂ín̂ḱŝ d́ô ńôt́ ĥáv̂é ĉŕâẃl̂áb̂ĺê h́r̂éf̂ át̂t́r̂íb̂út̂éŝ"
},
"lighthouse-core/audits/seo/crawlable-anchors.js | title": {
"message": "L̂ín̂ḱŝ h́âv́ê ṕôt́êńt̂íâĺl̂ý-ĉŕâẃl̂áb̂ĺê h́r̂éf̂ át̂t́r̂íb̂út̂éŝ"
},
"lighthouse-core/audits/seo/font-size.js | description": {
"message": "F̂ón̂t́ ŝíẑéŝ ĺêśŝ t́ĥán̂ 12ṕx̂ ár̂é t̂óô śm̂ál̂ĺ t̂ó b̂é l̂éĝíb̂ĺê án̂d́ r̂éq̂úîŕê ḿôb́îĺê v́îśît́ôŕŝ t́ô “ṕîńĉh́ t̂ó ẑóôḿ” îń ôŕd̂ér̂ t́ô ŕêád̂. Śt̂ŕîv́ê t́ô h́âv́ê >60% óf̂ ṕâǵê t́êx́t̂ ≥12ṕx̂. [Ĺêár̂ń m̂ór̂é](https://web.dev/font-size)."
},
Expand Down
143 changes: 143 additions & 0 deletions lighthouse-core/test/audits/seo/crawlable-anchors-test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/**
* @license Copyright 2020 The Lighthouse Authors. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const assert = require('assert');
const CrawlableAnchorsAudit = require('../../../audits/seo/crawlable-anchors.js');

/* eslint-env jest */

function runAudit({
rawHref = '',
onclick = '',
name = '',
listeners = onclick.trim().length ? [{type: 'click'}] : [],
}) {
const {score} = CrawlableAnchorsAudit.audit({
AnchorElements: [{
rawHref,
name,
listeners,
onclick,
}],
});

return score;
}

describe('SEO: Crawlable anchors audit', () => {
it('allows crawlable anchors', () => {
assert.equal(runAudit({rawHref: '#top'}), 1, 'hash fragment identifier');
assert.equal(runAudit({rawHref: 'mailto:name@example.com'}), 1, 'email link with a mailto URI');
assert.equal(runAudit({rawHref: 'https://example.com'}), 1, 'absolute HTTPs URL');
assert.equal(runAudit({rawHref: 'foo'}), 1, 'relative URL');
assert.equal(runAudit({rawHref: '/foo'}), 1, 'relative URL');
assert.equal(runAudit({rawHref: '#:~:text=string'}), 1, 'hyperlink with a text fragment');
assert.equal(runAudit({rawHref: 'ftp://myname@host.dom'}), 1, 'an FTP hyperlink');
assert.equal(runAudit({rawHref: 'http://172.217.20.78'}), 1, 'IP address based link');
assert.equal(runAudit({rawHref: '//example.com'}), 1, 'protocol relative link');
assert.equal(runAudit({rawHref: 'tel:5555555'}), 1, 'email link with a tel URI');
assert.equal(runAudit({rawHref: '#'}), 1, 'link with only a hash symbol');
assert.equal(runAudit({
rawHref: '?query=string',
}), 1, 'relative link which specifies a query string');
});

it('allows anchors which use a name attribute', () => {
assert.equal(runAudit({name: 'name'}), 1, 'link with a name attribute');
});

it('handles anchor elements which use event listeners', () => {
const auditResultClickPresent = runAudit({
listeners: [{type: 'click'}],
});
assert.equal(auditResultClickPresent, 1, 'presence of a click handler is a pass');

const auditResultJavaScriptURI = runAudit({
rawHref: 'javascript:void(0)',
listeners: [{type: 'click'}],
});
const assertionMessage = 'hyperlink with a `javascript:` URI and a click handler';
assert.equal(auditResultJavaScriptURI, 1, assertionMessage);

const auditResultNonClickListener = runAudit({
listeners: [{type: 'nope'}],
});
assert.equal(auditResultNonClickListener, 0, 'no click event is a fail');

const auditResultMixtureOfListeners = runAudit({
listeners: [{type: 'nope'}, {type: 'another'}, {type: 'click'}],
});
assert.equal(auditResultMixtureOfListeners, 1, 'at least one click listener is a pass');
});

it('disallows uncrawlable anchors', () => {
assert.equal(runAudit({}), 0, 'link with no meaningful attributes and no event handlers');
assert.equal(runAudit({rawHref: 'file:///image.png'}), 0, 'hyperlink with a `file:` URI');
assert.equal(runAudit({name: ' '}), 0, 'name attribute with only space characters');
assert.equal(runAudit({rawHref: ' '}), 0, 'href attribute with only space characters');
const assertionMessage = 'onclick attribute with only space characters';
assert.equal(runAudit({rawHref: ' ', onclick: ' '}), 0, assertionMessage);
});

it('handles javascript:void expressions in the onclick attribute', () => {
const expectedAuditFailures = [
'javascript:void(0)',
'javascript: void(0)',
'javascript : void(0)',
'javascript : void ( 0 )',
'javascript: void 0',
'javascript:void 0',
// The audit logic removes all whitespace from the string and considers this a fail
'javascript:void0',
];

for (const javaScriptVoidVariation of expectedAuditFailures) {
const auditResult = runAudit({rawHref: javaScriptVoidVariation});
assert.equal(auditResult, 0, 'javascript:void failing variations');
umaar marked this conversation as resolved.
Show resolved Hide resolved
}

const expectedAuditPasses = [
'javascript:void',
'javascript:void()',
'javascript:0',
];

for (const javaScriptVoidVariation of expectedAuditPasses) {
const auditResult = runAudit({rawHref: javaScriptVoidVariation});
assert.equal(auditResult, 1, 'javascript:void passing variations');
}
});

it('handles window.location and window.open assignments in an onclick attribute', () => {
const expectedAuditFailures = [
'window.location=',
'window.location =',
'window.open()',
`window.open('')`,
'window.open(`http://example.com`)',
'window.open ( )',
`window.open('foo', 'name', 'resizable)`,
];

for (const onclickVariation of expectedAuditFailures) {
const auditResult = runAudit({onclick: onclickVariation});
assert.equal(auditResult, 0, 'URL changing onclick strings');
}

const expectedAuditPasses = [
'windowAlocation',
'window.location.href',
'window.Location =',
'windowLopen()',
];

for (const onclickVariation of expectedAuditPasses) {
const auditResult = runAudit({onclick: onclickVariation});
assert.equal(auditResult, 1, 'onclick strings which do not change the URL');
}
});
});
Loading