From 8d6e48a8bccf1bc649123f712e66a6b2c40e7245 Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Wed, 23 Jan 2019 11:50:52 -0600 Subject: [PATCH 1/9] core(canonical): move canonical audit to LinkElements --- .../test/fixtures/seo/seo-failure-cases.html | 4 +- .../test/fixtures/seo/seo-tester.html | 5 + lighthouse-core/audits/seo/canonical.js | 226 ++++++++---------- lighthouse-core/computed/main-resource.js | 7 +- .../gather/gatherers/link-elements.js | 89 ++++++- .../simulator/network-analyzer.js | 14 +- .../test/audits/seo/canonical-test.js | 156 ++++++------ .../gather/gatherers/link-elements-test.js | 93 +++++++ types/artifacts.d.ts | 14 +- 9 files changed, 384 insertions(+), 224 deletions(-) create mode 100644 lighthouse-core/test/gather/gatherers/link-elements-test.js diff --git a/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html b/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html index b3713a90040d..5e8471f6eb05 100644 --- a/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html +++ b/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html @@ -14,8 +14,8 @@ - - + + diff --git a/lighthouse-cli/test/fixtures/seo/seo-tester.html b/lighthouse-cli/test/fixtures/seo/seo-tester.html index 6f31dcae289e..8eed1643184a 100644 --- a/lighthouse-cli/test/fixtures/seo/seo-tester.html +++ b/lighthouse-cli/test/fixtures/seo/seo-tester.html @@ -27,6 +27,11 @@ + + + + +

SEO

Anchor text

diff --git a/lighthouse-core/audits/seo/canonical.js b/lighthouse-core/audits/seo/canonical.js index b0032aed2a65..32fafb7f848e 100644 --- a/lighthouse-core/audits/seo/canonical.js +++ b/lighthouse-core/audits/seo/canonical.js @@ -6,44 +6,8 @@ 'use strict'; const Audit = require('../audit'); -const LinkHeader = require('http-link-header'); const URL = require('../../lib/url-shim'); const MainResource = require('../../computed/main-resource.js'); -const LINK_HEADER = 'link'; - -/** - * @param {string} headerValue - * @returns {Array} - */ -function getCanonicalLinksFromHeader(headerValue) { - const linkHeader = LinkHeader.parse(headerValue); - - return linkHeader.get('rel', 'canonical').map(c => c.uri); -} - -/** - * @param {string} headerValue - * @returns {Array} - */ -function getHreflangsFromHeader(headerValue) { - const linkHeader = LinkHeader.parse(headerValue); - - return linkHeader.get('rel', 'alternate').map(h => h.uri); -} - -/** - * Returns true if given string is a valid absolute or relative URL - * @param {string} url - * @returns {boolean} - */ -function isValidRelativeOrAbsoluteURL(url) { - try { - new URL(url, 'https://example.com/'); - return true; - } catch (e) { - return false; - } -} /** * Returns a primary domain for provided URL (e.g. http://www.example.com -> example.com). @@ -66,7 +30,7 @@ class Canonical extends Audit { failureTitle: 'Document does not have a valid `rel=canonical`', description: 'Canonical links suggest which URL to show in search results. ' + '[Learn more](https://developers.google.com/web/tools/lighthouse/audits/canonical).', - requiredArtifacts: ['Canonical', 'Hreflang', 'URL'], + requiredArtifacts: ['LinkElements', 'URL'], }; } @@ -75,102 +39,102 @@ class Canonical extends Audit { * @param {LH.Audit.Context} context * @return {Promise} */ - static audit(artifacts, context) { + static async audit(artifacts, context) { const devtoolsLog = artifacts.devtoolsLogs[Audit.DEFAULT_PASS]; - return MainResource.request({devtoolsLog, URL: artifacts.URL}, context) - .then(mainResource => { - const baseURL = new URL(mainResource.url); - /** @type {Array} */ - let canonicals = []; - /** @type {Array} */ - let hreflangs = []; - - mainResource.responseHeaders && mainResource.responseHeaders - .filter(h => h.name.toLowerCase() === LINK_HEADER) - .forEach(h => { - canonicals = canonicals.concat(getCanonicalLinksFromHeader(h.value)); - hreflangs = hreflangs.concat(getHreflangsFromHeader(h.value)); - }); - - for (const canonical of artifacts.Canonical) { - if (canonical !== null) { - canonicals.push(canonical); - } - } - // we should only fail if there are multiple conflicting URLs - // see: https://github.com/GoogleChrome/lighthouse/issues/3178#issuecomment-381181762 - canonicals = Array.from(new Set(canonicals)); - - artifacts.Hreflang.forEach(({href}) => hreflangs.push(href)); - - hreflangs = hreflangs - .filter(href => isValidRelativeOrAbsoluteURL(href)) - .map(href => (new URL(href, baseURL)).href); // normalize URLs - - if (canonicals.length === 0) { - return { - rawValue: true, - notApplicable: true, - }; - } - - if (canonicals.length > 1) { - return { - rawValue: false, - explanation: `Multiple conflicting URLs (${canonicals.join(', ')})`, - }; - } - - const canonical = canonicals[0]; - - if (!isValidRelativeOrAbsoluteURL(canonical)) { - return { - rawValue: false, - explanation: `Invalid URL (${canonical})`, - }; - } - - if (!URL.isValid(canonical)) { - return { - rawValue: false, - explanation: `Relative URL (${canonical})`, - }; - } - - const canonicalURL = new URL(canonical); - - // cross-language or cross-country canonicals are a common issue - if (hreflangs.includes(baseURL.href) && hreflangs.includes(canonicalURL.href) && - baseURL.href !== canonicalURL.href) { - return { - rawValue: false, - explanation: `Points to another hreflang location (${baseURL.href})`, - }; - } - - // bing and yahoo don't allow canonical URLs pointing to different domains, it's also - // a common mistake to publish a page with canonical pointing to e.g. a test domain or localhost - if (getPrimaryDomain(canonicalURL) !== getPrimaryDomain(baseURL)) { - return { - rawValue: false, - explanation: `Points to a different domain (${canonicalURL})`, - }; - } - - // another common mistake is to have canonical pointing from all pages of the website to its root - if (canonicalURL.origin === baseURL.origin && - canonicalURL.pathname === '/' && baseURL.pathname !== '/') { - return { - rawValue: false, - explanation: 'Points to a root of the same origin', - }; - } - - return { - rawValue: true, - }; - }); + const mainResource = await MainResource.request({devtoolsLog, URL: artifacts.URL}, context); + const baseURL = new URL(mainResource.url); + + /** @type {Set} */ + const uniqueCanonicalURLs = new Set(); + /** @type {string[]} */ + const hreflangURLs = []; + + /** @type {LH.Artifacts.LinkElement|undefined} */ + let invalidCanonicalLink; + /** @type {LH.Artifacts.LinkElement|undefined} */ + let relativeCanonicallink; + for (const link of artifacts.LinkElements) { + if (link.source === 'body') continue; + + if (link.rel === 'canonical') { + if (!link.hrefRaw) continue; + + if (!link.href) invalidCanonicalLink = link; + else if (!URL.isValid(link.hrefRaw)) relativeCanonicallink = link; + else uniqueCanonicalURLs.add(link.href); + } else if (link.rel === 'alternate') { + if (link.href && link.hreflang) hreflangURLs.push(link.href); + } + } + + // the canonical link is totally invalid + if (invalidCanonicalLink) { + return { + rawValue: false, + explanation: `Invalid URL (${invalidCanonicalLink.hrefRaw})`, + }; + } + + // the canonical link is valid, but it's relative which isn't allowed + if (relativeCanonicallink) { + return { + rawValue: false, + explanation: `Relative URL (${relativeCanonicallink.hrefRaw})`, + }; + } + + /** @type {string[]} */ + const canonicalURLs = Array.from(uniqueCanonicalURLs); + + // there's no canonical URL at all, we're done + if (canonicalURLs.length === 0) { + return { + rawValue: true, + notApplicable: true, + }; + } + + // we have multiple conflicting canonical URls, we're done + if (canonicalURLs.length > 1) { + return { + rawValue: false, + explanation: `Multiple conflicting URLs (${canonicalURLs.join(', ')})`, + }; + } + + const canonicalURL = new URL(canonicalURLs[0]); + + // cross-language or cross-country canonicals are a common issue + if (hreflangURLs.includes(baseURL.href) && hreflangURLs.includes(canonicalURL.href) && + baseURL.href !== canonicalURL.href) { + return { + rawValue: false, + explanation: `Points to another hreflang location (${baseURL.href})`, + }; + } + + // bing and yahoo don't allow canonical URLs pointing to different domains, it's also + // a common mistake to publish a page with canonical pointing to e.g. a test domain or localhost + if (getPrimaryDomain(canonicalURL) !== getPrimaryDomain(baseURL)) { + return { + rawValue: false, + explanation: `Points to a different domain (${canonicalURL})`, + }; + } + + // another common mistake is to have canonical pointing from all pages of the website to its root + if (canonicalURL.origin === baseURL.origin && + canonicalURL.pathname === '/' && baseURL.pathname !== '/') { + return { + rawValue: false, + explanation: 'Points to a root of the same origin', + }; + } + + return { + rawValue: true, + }; } } diff --git a/lighthouse-core/computed/main-resource.js b/lighthouse-core/computed/main-resource.js index 0dfc0bd4746d..6181f1d31d47 100644 --- a/lighthouse-core/computed/main-resource.js +++ b/lighthouse-core/computed/main-resource.js @@ -6,7 +6,7 @@ 'use strict'; const makeComputedArtifact = require('./computed-artifact.js'); -const URL = require('../lib/url-shim.js'); +const NetworkAnalyzer = require('../lib/dependency-graph/simulator/network-analyzer.js'); const NetworkRecords = require('./network-records.js'); /** @@ -22,10 +22,7 @@ class MainResource { static async compute_(data, context) { const finalUrl = data.URL.finalUrl; const requests = await NetworkRecords.request(data.devtoolsLog, context); - // equalWithExcludedFragments is expensive, so check that the finalUrl starts with the request first - const mainResource = requests.find(request => finalUrl.startsWith(request.url) && - URL.equalWithExcludedFragments(request.url, finalUrl)); - + const mainResource = NetworkAnalyzer.findMainDocument(requests, finalUrl); if (!mainResource) { throw new Error('Unable to identify the main resource'); } diff --git a/lighthouse-core/gather/gatherers/link-elements.js b/lighthouse-core/gather/gatherers/link-elements.js index 9283265e00c7..53296f4c224d 100644 --- a/lighthouse-core/gather/gatherers/link-elements.js +++ b/lighthouse-core/gather/gatherers/link-elements.js @@ -6,31 +6,110 @@ 'use strict'; const Gatherer = require('./gatherer.js'); -const getElementsInDocumentString = require('../../lib/page-functions.js').getElementsInDocumentString; // eslint-disable-line max-len +const NetworkAnalyzer = require('../../lib/dependency-graph/simulator/network-analyzer.js'); +const LinkHeader = require('http-link-header'); +const getElementsInDocumentString = require('../../lib/page-functions.js') + .getElementsInDocumentString; // eslint-disable-line max-len + +/** + * + * @param {string} url + * @param {string} finalUrl + * @return {string|null} + */ +function normalizeUrlOrNull(url, finalUrl) { + try { + return new URL(url, finalUrl).href; + } catch (_) { + return null; + } +} + +/** + * @param {string|undefined} value + * @return {LH.Artifacts.LinkElement['crossOrigin']} + */ +function getCrossoriginFromHeader(value) { + if (value === 'anonymous') return 'anonymous'; + if (value === 'use-credentials') return 'use-credentials'; + return null; +} class LinkElements extends Gatherer { /** * @param {LH.Gatherer.PassContext} passContext * @return {Promise} */ - async afterPass(passContext) { - const driver = passContext.driver; - + static getLinkElementsInDOM(passContext) { // We'll use evaluateAsync because the `node.getAttribute` method doesn't actually normalize // the values like access from JavaScript does. - return driver.evaluateAsync(`(() => { + return passContext.driver.evaluateAsync(`(() => { ${getElementsInDocumentString}; return getElementsInDocument('link').map(link => { return { rel: link.rel, href: link.href, + hrefRaw: link.href, + hreflang: link.hreflang, as: link.as, crossOrigin: link.crossOrigin, + source: link.closest('head') ? 'head' : 'body', }; }); })()`, {useIsolation: true}); } + + /** + * @param {LH.Gatherer.PassContext} passContext + * @param {LH.Gatherer.LoadData} loadData + * @return {LH.Artifacts['LinkElements']} + */ + static getLinkElementsInHeaders(passContext, loadData) { + const finalUrl = passContext.baseArtifacts.URL.finalUrl; + const records = loadData.networkRecords; + const mainDocument = NetworkAnalyzer.findMainDocument(records, finalUrl); + if (!mainDocument) return []; + + /** @type {LH.Artifacts['LinkElements']} */ + const linkElements = []; + + for (const header of mainDocument.responseHeaders) { + if (header.name.toLowerCase() !== 'link') continue; + + for (const link of LinkHeader.parse(header.value).refs) { + linkElements.push({ + rel: link.rel || '', + href: normalizeUrlOrNull(link.uri, finalUrl), + hrefRaw: link.uri || '', + hreflang: link.hreflang || '', + as: link.as || '', + crossOrigin: getCrossoriginFromHeader(link.crossorigin), + source: 'headers', + }); + } + } + + return linkElements; + } + + /** + * @param {LH.Gatherer.PassContext} passContext + * @param {LH.Gatherer.LoadData} loadData + * @return {Promise} + */ + async afterPass(passContext, loadData) { + const fromDOM = await LinkElements.getLinkElementsInDOM(passContext); + const fromHeaders = LinkElements.getLinkElementsInHeaders(passContext, loadData); + const linkElements = fromDOM.concat(fromHeaders); + + for (const link of linkElements) { + // Normalize the rel for easy consumption/filtering + link.rel = link.rel.toLowerCase(); + } + + return linkElements; + } } module.exports = LinkElements; diff --git a/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js b/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js index 7f2dd6b0d7fe..6ca8201306bc 100644 --- a/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js +++ b/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js @@ -6,7 +6,8 @@ 'use strict'; const INITIAL_CWD = 14 * 1024; -const NetworkRequest = require('../../network-request'); +const NetworkRequest = require('../../network-request.js'); +const URL = require('../../url-shim.js'); // Assume that 40% of TTFB was server response time by default for static assets const DEFAULT_SERVER_RESPONSE_PERCENTAGE = 0.4; @@ -443,9 +444,18 @@ class NetworkAnalyzer { /** * @param {Array} records + * @param {string} [finalURL] * @return {LH.Artifacts.NetworkRequest} */ - static findMainDocument(records) { + static findMainDocument(records, finalURL) { + // Try to find an exact match with the final URL first if we have one + if (finalURL) { + // equalWithExcludedFragments is expensive, so check that the finalUrl starts with the request first + const mainResource = records.find(request => finalURL.startsWith(request.url) && + URL.equalWithExcludedFragments(request.url, finalURL)); + if (mainResource) return mainResource; + } + const documentRequests = records.filter(record => record.resourceType === NetworkRequest.TYPES.Document); // The main document is the earliest document request, using position in networkRecords array to break ties. diff --git a/lighthouse-core/test/audits/seo/canonical-test.js b/lighthouse-core/test/audits/seo/canonical-test.js index 484b6023399d..41ded04a7567 100644 --- a/lighthouse-core/test/audits/seo/canonical-test.js +++ b/lighthouse-core/test/audits/seo/canonical-test.js @@ -12,18 +12,26 @@ const networkRecordsToDevtoolsLog = require('../../network-records-to-devtools-l /* eslint-env jest */ describe('SEO: Document has valid canonical link', () => { + function link(overrides) { + if (overrides.href && !overrides.hrefRaw) overrides.hrefRaw = overrides.href; + return { + rel: '', + href: null, + hrefRaw: '', + hreflang: '', + source: 'head', + ...overrides, + }; + } + it('succeeds when there are no canonical links', () => { const finalUrl = 'https://example.com/'; - const mainResource = { - url: finalUrl, - responseHeaders: [], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: [], - Hreflang: [], + LinkElements: [], }; const context = {computedCache: new Map()}; @@ -34,19 +42,15 @@ describe('SEO: Document has valid canonical link', () => { it('fails when there are multiple canonical links', () => { const finalUrl = 'http://www.example.com/'; - const mainResource = { - url: finalUrl, - responseHeaders: [{ - name: 'Link', - value: '; rel="canonical"', - }], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https://www.example.com'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'head', href: 'https://www.example.com'}), + link({rel: 'canonical', source: 'headers', href: 'https://example.com'}), + ], }; const context = {computedCache: new Map()}; @@ -58,61 +62,56 @@ describe('SEO: Document has valid canonical link', () => { it('fails when canonical url is invalid', () => { const finalUrl = 'http://www.example.com'; - const mainResource = { - url: finalUrl, - responseHeaders: [], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https:// example.com'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'head', href: null, hrefRaw: 'https:// example.com'}), + ], }; const context = {computedCache: new Map()}; return CanonicalAudit.audit(artifacts, context).then(auditResult => { - assert.equal(auditResult.rawValue, false); - assert.ok(auditResult.explanation.includes('Invalid'), auditResult.explanation); + const {rawValue, explanation} = auditResult; + assert.equal(rawValue, false); + assert.ok(explanation.includes('Invalid') && explanation.includes('https:// '), explanation); }); }); it('fails when canonical url is relative', () => { const finalUrl = 'https://example.com/de/'; - const mainResource = { - url: finalUrl, - responseHeaders: [], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['/'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'headers', href: 'https://www.example.com', hrefRaw: '/'}), + ], }; const context = {computedCache: new Map()}; return CanonicalAudit.audit(artifacts, context).then(auditResult => { - assert.equal(auditResult.rawValue, false); - assert.ok(auditResult.explanation.includes('Relative'), auditResult.explanation); + const {rawValue, explanation} = auditResult; + assert.equal(rawValue, false); + assert.ok(explanation.includes('Relative') && explanation.includes('/'), explanation); }); }); it('fails when canonical points to a different hreflang', () => { - const finalUrl = 'https://example.com'; - const mainResource = { - url: finalUrl, - responseHeaders: [{ - name: 'Link', - value: '; rel="alternate"; hreflang="xx"', - }], - }; + const finalUrl = 'https://example.com/'; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https://example.com/fr/'], - Hreflang: [{href: 'https://example.com/fr/'}], + LinkElements: [ + link({rel: 'alternate', source: 'headers', href: 'https://example.com/', hreflang: 'xx'}), + link({rel: 'canonical', source: 'head', href: 'https://example.com/fr'}), + link({rel: 'alternate', source: 'head', href: 'https://example.com/fr', hreflang: 'fr'}), + ], }; const context = {computedCache: new Map()}; @@ -124,16 +123,14 @@ describe('SEO: Document has valid canonical link', () => { it('fails when canonical points to a different domain', () => { const finalUrl = 'http://localhost.test'; - const mainResource = { - url: finalUrl, - responseHeaders: [], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https://example.com/'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'head', href: 'https://example.com'}), + ], }; const context = {computedCache: new Map()}; @@ -145,16 +142,14 @@ describe('SEO: Document has valid canonical link', () => { it('fails when canonical points to the root while current URL is not the root', () => { const finalUrl = 'https://example.com/articles/cats-and-you'; - const mainResource = { - url: finalUrl, - responseHeaders: [], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https://example.com/'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'head', href: 'https://example.com'}), + ], }; const context = {computedCache: new Map()}; @@ -166,19 +161,15 @@ describe('SEO: Document has valid canonical link', () => { it('succeeds when there are multiple identical canonical links', () => { const finalUrl = 'http://www.example.com/'; - const mainResource = { - url: finalUrl, - responseHeaders: [{ - name: 'Link', - value: '; rel="canonical"', - }], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https://www.example.com'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'head', href: 'https://example.com'}), + link({rel: 'canonical', source: 'headers', href: 'https://example.com'}), + ], }; const context = {computedCache: new Map()}; @@ -189,16 +180,14 @@ describe('SEO: Document has valid canonical link', () => { it('succeeds when valid canonical is provided via meta tag', () => { const finalUrl = 'http://example.com/articles/cats-and-you?utm_source=twitter'; - const mainResource = { - url: finalUrl, - responseHeaders: [], - }; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https://example.com/articles/cats-and-you'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'head', href: 'https://example.com/articles/cats-and-you'}), + ], }; const context = {computedCache: new Map()}; @@ -208,20 +197,33 @@ describe('SEO: Document has valid canonical link', () => { }); it('succeeds when valid canonical is provided via header', () => { - const finalUrl = 'http://example.com/articles/cats-and-you?utm_source=twitter'; - const mainResource = { - url: finalUrl, - responseHeaders: [{ - name: 'Link', - value: '; rel="canonical"', - }], + const finalUrl = 'http://example.com/articles/cats?utm_source=twitter'; + const mainResource = {url: finalUrl}; + const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); + const artifacts = { + devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, + URL: {finalUrl}, + LinkElements: [ + link({rel: 'canonical', source: 'headers', href: 'https://example.com/articles/cats'}), + ], }; + + const context = {computedCache: new Map()}; + return CanonicalAudit.audit(artifacts, context).then(auditResult => { + assert.equal(auditResult.rawValue, true); + }); + }); + + it('succeeds when invalid canonical is provided in body', () => { + const finalUrl = 'http://example.com/articles/cats-and-you?utm_source=twitter'; + const mainResource = {url: finalUrl}; const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]); const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: [], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'body', href: 'https://foo.com'}), + ], }; const context = {computedCache: new Map()}; diff --git a/lighthouse-core/test/gather/gatherers/link-elements-test.js b/lighthouse-core/test/gather/gatherers/link-elements-test.js new file mode 100644 index 000000000000..f0991b78a69b --- /dev/null +++ b/lighthouse-core/test/gather/gatherers/link-elements-test.js @@ -0,0 +1,93 @@ +/** + * @license Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + */ +'use strict'; + +/* eslint-env jest */ + +const LinkElements = require('../../../gather/gatherers/link-elements.js'); + +describe('Link Elements gatherer', () => { + let linkElementsInDOM; + let passContext; + let loadData; + + function link(overrides) { + if (overrides.href && !overrides.hrefRaw) overrides.hrefRaw = overrides.href; + return { + rel: '', + href: null, + hrefRaw: '', + hreflang: '', + as: '', + crossOrigin: null, + ...overrides, + }; + } + + function setMainDocumentHeaders(headers) { + const url = 'https://example.com'; + loadData.networkRecords.push({url, responseHeaders: headers, resourceType: 'Document'}); + passContext.baseArtifacts.URL.finalUrl = url; + } + + beforeEach(() => { + linkElementsInDOM = []; + const driver = {evaluateAsync: () => Promise.resolve(linkElementsInDOM)}; + passContext = {driver, baseArtifacts: {URL: {finalUrl: ''}}}; + loadData = {networkRecords: [{url: '', responseHeaders: [], resourceType: 'Document'}]}; + }); + + it('returns elements from DOM', async () => { + linkElementsInDOM = [ + link({source: 'head', rel: 'preconnect', href: 'https://cdn.example.com'}), + link({source: 'head', rel: 'styleSheeT', href: 'https://example.com/a.css'}), + link({source: 'body', rel: 'ICON', href: 'https://example.com/a.png'}), + ]; + + const result = await new LinkElements().afterPass(passContext, loadData); + expect(result).toEqual([ + link({source: 'head', rel: 'preconnect', href: 'https://cdn.example.com'}), + link({source: 'head', rel: 'stylesheet', href: 'https://example.com/a.css'}), + link({source: 'body', rel: 'icon', href: 'https://example.com/a.png'}), + ]); + }); + + it('returns elements from headers', async () => { + setMainDocumentHeaders([ + {name: 'Link', value: '; rel=prefetch; as=image'}, + {name: 'link', value: '; rel=preconnect; crossorigin=anonymous'}, + {name: 'Link', value: '; rel="preload",; rel="canonical"'}, + {name: 'LINK', value: '; rel=alternate; hreflang=xx'}, + ]); + + const result = await new LinkElements().afterPass(passContext, loadData); + expect(result).toEqual([ + link({source: 'headers', rel: 'prefetch', href: 'https://example.com/', as: 'image'}), + link({source: 'headers', rel: 'preconnect', href: 'https://example.com/', crossOrigin: 'anonymous'}), // eslint-disable-line max-len + link({source: 'headers', rel: 'preload', href: 'https://example.com/style.css'}), + link({source: 'headers', rel: 'canonical', href: 'https://example.com/', hrefRaw: '/'}), + link({source: 'headers', rel: 'alternate', href: 'https://example.com/', hreflang: 'xx'}), + ]); + }); + + it('combines elements from headers and DOM', async () => { + linkElementsInDOM = [ + link({source: 'head', rel: 'styleSheeT', href: 'https://example.com/a.css'}), + link({source: 'body', rel: 'ICON', href: 'https://example.com/a.png'}), + ]; + + setMainDocumentHeaders([ + {name: 'Link', value: '; rel=prefetch; as=image'}, + ]); + + const result = await new LinkElements().afterPass(passContext, loadData); + expect(result).toEqual([ + link({source: 'head', rel: 'stylesheet', href: 'https://example.com/a.css'}), + link({source: 'body', rel: 'icon', href: 'https://example.com/a.png'}), + link({source: 'headers', rel: 'prefetch', href: 'https://example.com/', as: 'image'}), + ]); + }); +}); diff --git a/types/artifacts.d.ts b/types/artifacts.d.ts index e2c3e55b19b9..291e841d16e6 100644 --- a/types/artifacts.d.ts +++ b/types/artifacts.d.ts @@ -170,10 +170,20 @@ declare global { /** @see https://developer.mozilla.org/en-US/docs/Web/HTML/Element/link#Attributes */ export interface LinkElement { - rel: string - href: string + /** The `rel` attribute of the link, normalized to lower case. @see https://developer.mozilla.org/en-US/docs/Web/HTML/Link_types */ + rel: 'alternate'|'canonical'|'dns-prefetch'|'preconnect'|'preload'|'stylesheet'|string; + /** The `href` attribute of the link or `null` if it was invalid in the header. */ + href: string | null + /** The raw value of the `href` attribute. Only different from `href` when source is 'header' */ + hrefRaw: string + /** The `hreflang` attribute of the link */ + hreflang: string + /** The `as` attribute of the link */ as: string + /** The `crossOrigin` attribute of the link */ crossOrigin: 'anonymous'|'use-credentials'|null + /** Where the link was found, either in the DOM or in the headers of the main document */ + source: 'head'|'body'|'headers' } export interface Font { From 59cccb7a98df879118e7d74d6404a4406ef79b68 Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Wed, 23 Jan 2019 11:53:07 -0600 Subject: [PATCH 2/9] remove canonical gatherer --- .../test/cli/__snapshots__/index-test.js.snap | 3 --- lighthouse-core/config/default-config.js | 1 - .../gather/gatherers/seo/canonical.js | 24 ------------------- 3 files changed, 28 deletions(-) delete mode 100644 lighthouse-core/gather/gatherers/seo/canonical.js diff --git a/lighthouse-cli/test/cli/__snapshots__/index-test.js.snap b/lighthouse-cli/test/cli/__snapshots__/index-test.js.snap index 064368476e80..3c68fbd99ea1 100644 --- a/lighthouse-cli/test/cli/__snapshots__/index-test.js.snap +++ b/lighthouse-cli/test/cli/__snapshots__/index-test.js.snap @@ -1101,9 +1101,6 @@ Object { Object { "path": "seo/embedded-content", }, - Object { - "path": "seo/canonical", - }, Object { "path": "seo/robots-txt", }, diff --git a/lighthouse-core/config/default-config.js b/lighthouse-core/config/default-config.js index 5fe0e35898d2..15e5414a344e 100644 --- a/lighthouse-core/config/default-config.js +++ b/lighthouse-core/config/default-config.js @@ -107,7 +107,6 @@ const defaultConfig = { 'seo/crawlable-links', 'seo/hreflang', 'seo/embedded-content', - 'seo/canonical', 'seo/robots-txt', ], }, diff --git a/lighthouse-core/gather/gatherers/seo/canonical.js b/lighthouse-core/gather/gatherers/seo/canonical.js deleted file mode 100644 index a9105434eb41..000000000000 --- a/lighthouse-core/gather/gatherers/seo/canonical.js +++ /dev/null @@ -1,24 +0,0 @@ -/** - * @license Copyright 2017 Google Inc. All Rights Reserved. - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - */ -'use strict'; - -const Gatherer = require('../gatherer'); - -class Canonical extends Gatherer { - /** - * @param {LH.Gatherer.PassContext} passContext - * @return {Promise} - */ - afterPass(passContext) { - const driver = passContext.driver; - - return driver.querySelectorAll('head link[rel="canonical" i]') - .then(nodes => Promise.all(nodes.map(node => node.getAttribute('href')))); - } -} - -module.exports = Canonical; - From dfd6cea6085d762db79e5e71f23f9077f6392687 Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Wed, 23 Jan 2019 12:01:39 -0600 Subject: [PATCH 3/9] tests --- lighthouse-core/gather/gatherers/link-elements.js | 1 + .../lib/dependency-graph/simulator/network-analyzer.js | 1 + lighthouse-core/test/computed/main-resource-test.js | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lighthouse-core/gather/gatherers/link-elements.js b/lighthouse-core/gather/gatherers/link-elements.js index 53296f4c224d..fa1dc7603386 100644 --- a/lighthouse-core/gather/gatherers/link-elements.js +++ b/lighthouse-core/gather/gatherers/link-elements.js @@ -6,6 +6,7 @@ 'use strict'; const Gatherer = require('./gatherer.js'); +const URL = require('../../lib/url-shim.js'); const NetworkAnalyzer = require('../../lib/dependency-graph/simulator/network-analyzer.js'); const LinkHeader = require('http-link-header'); const getElementsInDocumentString = require('../../lib/page-functions.js') diff --git a/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js b/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js index 6ca8201306bc..cc083fd74959 100644 --- a/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js +++ b/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js @@ -458,6 +458,7 @@ class NetworkAnalyzer { const documentRequests = records.filter(record => record.resourceType === NetworkRequest.TYPES.Document); + if (!documentRequests.length) throw new Error('Unable to identify the main resource'); // The main document is the earliest document request, using position in networkRecords array to break ties. return documentRequests.reduce((min, r) => (r.startTime < min.startTime ? r : min)); } diff --git a/lighthouse-core/test/computed/main-resource-test.js b/lighthouse-core/test/computed/main-resource-test.js index b2276ad4744a..dd13870b5f53 100644 --- a/lighthouse-core/test/computed/main-resource-test.js +++ b/lighthouse-core/test/computed/main-resource-test.js @@ -31,7 +31,7 @@ describe('MainResource computed artifact', () => { it('thows when main resource can\'t be found', () => { const networkRecords = [ - {url: 'https://example.com'}, + {url: 'https://example.com', resourceType: 'Script'}, ]; const URL = {finalUrl: 'https://m.example.com'}; const devtoolsLog = networkRecordsToDevtoolsLog(networkRecords); From a781c23bcc63778b019ad1af9cc6ed1baa795fb1 Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Wed, 23 Jan 2019 12:06:29 -0600 Subject: [PATCH 4/9] proper URL shim inclusion --- lighthouse-core/gather/gatherers/link-elements.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lighthouse-core/gather/gatherers/link-elements.js b/lighthouse-core/gather/gatherers/link-elements.js index fa1dc7603386..70ff15893536 100644 --- a/lighthouse-core/gather/gatherers/link-elements.js +++ b/lighthouse-core/gather/gatherers/link-elements.js @@ -6,7 +6,7 @@ 'use strict'; const Gatherer = require('./gatherer.js'); -const URL = require('../../lib/url-shim.js'); +const URL = require('../../lib/url-shim.js').URL; const NetworkAnalyzer = require('../../lib/dependency-graph/simulator/network-analyzer.js'); const LinkHeader = require('http-link-header'); const getElementsInDocumentString = require('../../lib/page-functions.js') From ef613c77d6772843fef17ade69cf20e467d2a849 Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Wed, 23 Jan 2019 12:20:41 -0600 Subject: [PATCH 5/9] better url and smoke fix --- lighthouse-cli/test/fixtures/seo/seo-failure-cases.html | 2 +- lighthouse-cli/test/smokehouse/seo/expectations.js | 2 +- lighthouse-core/gather/gatherers/link-elements.js | 2 +- lighthouse-core/test/gather/gatherers/link-elements-test.js | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html b/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html index 5e8471f6eb05..f9aded0c5ac7 100644 --- a/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html +++ b/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html @@ -19,7 +19,7 @@ - +

SEO

diff --git a/lighthouse-cli/test/smokehouse/seo/expectations.js b/lighthouse-cli/test/smokehouse/seo/expectations.js index a1f6269dcab9..3ed3fd6a4c31 100644 --- a/lighthouse-cli/test/smokehouse/seo/expectations.js +++ b/lighthouse-cli/test/smokehouse/seo/expectations.js @@ -136,7 +136,7 @@ module.exports = [ }, 'canonical': { score: 0, - explanation: 'Multiple conflicting URLs (https://example.com, https://example.com/)', + explanation: 'Multiple conflicting URLs (https://example.com/other, https://example.com/)', }, }, }, diff --git a/lighthouse-core/gather/gatherers/link-elements.js b/lighthouse-core/gather/gatherers/link-elements.js index 70ff15893536..73fa83d91108 100644 --- a/lighthouse-core/gather/gatherers/link-elements.js +++ b/lighthouse-core/gather/gatherers/link-elements.js @@ -67,7 +67,7 @@ class LinkElements extends Gatherer { * @return {LH.Artifacts['LinkElements']} */ static getLinkElementsInHeaders(passContext, loadData) { - const finalUrl = passContext.baseArtifacts.URL.finalUrl; + const finalUrl = passContext.url; const records = loadData.networkRecords; const mainDocument = NetworkAnalyzer.findMainDocument(records, finalUrl); if (!mainDocument) return []; diff --git a/lighthouse-core/test/gather/gatherers/link-elements-test.js b/lighthouse-core/test/gather/gatherers/link-elements-test.js index f0991b78a69b..14a009b7c7e9 100644 --- a/lighthouse-core/test/gather/gatherers/link-elements-test.js +++ b/lighthouse-core/test/gather/gatherers/link-elements-test.js @@ -30,13 +30,13 @@ describe('Link Elements gatherer', () => { function setMainDocumentHeaders(headers) { const url = 'https://example.com'; loadData.networkRecords.push({url, responseHeaders: headers, resourceType: 'Document'}); - passContext.baseArtifacts.URL.finalUrl = url; + passContext.url = url; } beforeEach(() => { linkElementsInDOM = []; const driver = {evaluateAsync: () => Promise.resolve(linkElementsInDOM)}; - passContext = {driver, baseArtifacts: {URL: {finalUrl: ''}}}; + passContext = {driver, url: ''}; loadData = {networkRecords: [{url: '', responseHeaders: [], resourceType: 'Document'}]}; }); From db93a43283e30a2b61478ee30b4c539adb7a3c58 Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Mon, 11 Feb 2019 16:03:33 -0600 Subject: [PATCH 6/9] cleanup canonical audit --- lighthouse-core/audits/seo/canonical.js | 95 ++++++++++++++++++++----- 1 file changed, 77 insertions(+), 18 deletions(-) diff --git a/lighthouse-core/audits/seo/canonical.js b/lighthouse-core/audits/seo/canonical.js index 32fafb7f848e..6942ea505736 100644 --- a/lighthouse-core/audits/seo/canonical.js +++ b/lighthouse-core/audits/seo/canonical.js @@ -16,9 +16,20 @@ const MainResource = require('../../computed/main-resource.js'); * @returns {string} */ function getPrimaryDomain(url) { - return url.hostname.split('.').slice(-2).join('.'); + return url.hostname + .split('.') + .slice(-2) + .join('.'); } +/** + * @typedef CanonicalURLData + * @property {Set} uniqueCanonicalURLs + * @property {Set} hreflangURLs + * @property {LH.Artifacts.LinkElement|undefined} invalidCanonicalLink + * @property {LH.Artifacts.LinkElement|undefined} relativeCanonicallink + */ + class Canonical extends Audit { /** * @return {LH.Audit.Meta} @@ -28,7 +39,8 @@ class Canonical extends Audit { id: 'canonical', title: 'Document has a valid `rel=canonical`', failureTitle: 'Document does not have a valid `rel=canonical`', - description: 'Canonical links suggest which URL to show in search results. ' + + description: + 'Canonical links suggest which URL to show in search results. ' + '[Learn more](https://developers.google.com/web/tools/lighthouse/audits/canonical).', requiredArtifacts: ['LinkElements', 'URL'], }; @@ -36,19 +48,13 @@ class Canonical extends Audit { /** * @param {LH.Artifacts} artifacts - * @param {LH.Audit.Context} context - * @return {Promise} + * @return {CanonicalURLData} */ - static async audit(artifacts, context) { - const devtoolsLog = artifacts.devtoolsLogs[Audit.DEFAULT_PASS]; - - const mainResource = await MainResource.request({devtoolsLog, URL: artifacts.URL}, context); - const baseURL = new URL(mainResource.url); - + static collectCanonicalURLs(artifacts) { /** @type {Set} */ const uniqueCanonicalURLs = new Set(); - /** @type {string[]} */ - const hreflangURLs = []; + /** @type {Set} */ + const hreflangURLs = new Set(); /** @type {LH.Artifacts.LinkElement|undefined} */ let invalidCanonicalLink; @@ -64,10 +70,20 @@ class Canonical extends Audit { else if (!URL.isValid(link.hrefRaw)) relativeCanonicallink = link; else uniqueCanonicalURLs.add(link.href); } else if (link.rel === 'alternate') { - if (link.href && link.hreflang) hreflangURLs.push(link.href); + if (link.href && link.hreflang) hreflangURLs.add(link.href); } } + return {uniqueCanonicalURLs, hreflangURLs, invalidCanonicalLink, relativeCanonicallink}; + } + + /** + * @param {CanonicalURLData} canonicalURLData + * @return {string|LH.Audit.Product} + */ + static findValidCanonicaURLOrFinish(canonicalURLData) { + const {uniqueCanonicalURLs, invalidCanonicalLink, relativeCanonicallink} = canonicalURLData; + // the canonical link is totally invalid if (invalidCanonicalLink) { return { @@ -103,11 +119,24 @@ class Canonical extends Audit { }; } - const canonicalURL = new URL(canonicalURLs[0]); + return canonicalURLs[0]; + } + + /** + * @param {CanonicalURLData} canonicalURLData + * @param {URL} canonicalURL + * @param {URL} baseURL + * @return {LH.Audit.Product|undefined} + */ + static findCommonCanonicalURLMistakes(canonicalURLData, canonicalURL, baseURL) { + const {hreflangURLs} = canonicalURLData; // cross-language or cross-country canonicals are a common issue - if (hreflangURLs.includes(baseURL.href) && hreflangURLs.includes(canonicalURL.href) && - baseURL.href !== canonicalURL.href) { + if ( + hreflangURLs.has(baseURL.href) && + hreflangURLs.has(canonicalURL.href) && + baseURL.href !== canonicalURL.href + ) { return { rawValue: false, explanation: `Points to another hreflang location (${baseURL.href})`, @@ -124,13 +153,43 @@ class Canonical extends Audit { } // another common mistake is to have canonical pointing from all pages of the website to its root - if (canonicalURL.origin === baseURL.origin && - canonicalURL.pathname === '/' && baseURL.pathname !== '/') { + if ( + canonicalURL.origin === baseURL.origin && + canonicalURL.pathname === '/' && + baseURL.pathname !== '/' + ) { return { rawValue: false, explanation: 'Points to a root of the same origin', }; } + } + + /** + * @param {LH.Artifacts} artifacts + * @param {LH.Audit.Context} context + * @return {Promise} + */ + static async audit(artifacts, context) { + const devtoolsLog = artifacts.devtoolsLogs[Audit.DEFAULT_PASS]; + + const mainResource = await MainResource.request({devtoolsLog, URL: artifacts.URL}, context); + const baseURL = new URL(mainResource.url); + + const canonicalURLData = Canonical.collectCanonicalURLs(artifacts); + const canonicalURLOrAuditProduct = Canonical.findValidCanonicaURLOrFinish(canonicalURLData); + // We didn't find a valid canonical URL, we found an error result so go ahead and return it. + if (typeof canonicalURLOrAuditProduct === 'object') return canonicalURLOrAuditProduct; + + // We found a valid canonical URL, so we'll just check for common mistakes. + const canonicalURL = new URL(canonicalURLOrAuditProduct); + const mistakeAuditProduct = Canonical.findCommonCanonicalURLMistakes( + canonicalURLData, + canonicalURL, + baseURL + ); + + if (mistakeAuditProduct) return mistakeAuditProduct; return { rawValue: true, From 4f20c9eb48c0e59f32b4df916fca7e4da9d41cdd Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Wed, 27 Feb 2019 12:13:39 -0800 Subject: [PATCH 7/9] canonical fixes --- lighthouse-core/test/audits/seo/canonical-test.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lighthouse-core/test/audits/seo/canonical-test.js b/lighthouse-core/test/audits/seo/canonical-test.js index 07aac31535df..05e1920caa28 100644 --- a/lighthouse-core/test/audits/seo/canonical-test.js +++ b/lighthouse-core/test/audits/seo/canonical-test.js @@ -57,7 +57,7 @@ describe('SEO: Document has valid canonical link', () => { return CanonicalAudit.audit(artifacts, context).then(auditResult => { assert.equal(auditResult.rawValue, false); expect(auditResult.explanation) - .toBeDisplayString('Multiple conflicting URLs (https://example.com, https://www.example.com)'); + .toBeDisplayString('Multiple conflicting URLs (https://www.example.com, https://example.com)'); }); }); @@ -77,7 +77,7 @@ describe('SEO: Document has valid canonical link', () => { return CanonicalAudit.audit(artifacts, context).then(auditResult => { const {rawValue, explanation} = auditResult; assert.equal(rawValue, false); - assert.ok(explanation.includes('Invalid') && explanation.includes('https:// '), explanation); + expect(explanation).toBeDisplayString('Invalid URL (https:// example.com)'); }); }); @@ -97,7 +97,7 @@ describe('SEO: Document has valid canonical link', () => { return CanonicalAudit.audit(artifacts, context).then(auditResult => { const {rawValue, explanation} = auditResult; assert.equal(rawValue, false); - assert.ok(explanation.includes('Relative') && explanation.includes('/'), explanation); + expect(explanation).toBeDisplayString('Relative URL (/)'); }); }); @@ -153,8 +153,9 @@ describe('SEO: Document has valid canonical link', () => { const artifacts = { devtoolsLogs: {[CanonicalAudit.DEFAULT_PASS]: devtoolsLog}, URL: {finalUrl}, - Canonical: ['https://example.com/'], - Hreflang: [], + LinkElements: [ + link({rel: 'canonical', source: 'head', href: 'https://example.com'}), + ], }; const context = {computedCache: new Map()}; From 5f8e312872ef373302180ded4a65f95950073084 Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Thu, 7 Mar 2019 14:33:59 -0600 Subject: [PATCH 8/9] feedback --- lighthouse-core/audits/seo/canonical.js | 29 +++++++------ .../gather/gatherers/link-elements.js | 7 +++- .../simulator/network-analyzer.js | 1 + .../test/audits/seo/canonical-test.js | 4 ++ .../gather/gatherers/link-elements-test.js | 41 ++++++++----------- types/artifacts.d.ts | 4 +- 6 files changed, 46 insertions(+), 40 deletions(-) diff --git a/lighthouse-core/audits/seo/canonical.js b/lighthouse-core/audits/seo/canonical.js index 6041145e812c..e1bd050c485b 100644 --- a/lighthouse-core/audits/seo/canonical.js +++ b/lighthouse-core/audits/seo/canonical.js @@ -71,10 +71,10 @@ class Canonical extends Audit { } /** - * @param {LH.Artifacts} artifacts + * @param {LH.Artifacts.LinkElement[]} linkElements * @return {CanonicalURLData} */ - static collectCanonicalURLs(artifacts) { + static collectCanonicalURLs(linkElements) { /** @type {Set} */ const uniqueCanonicalURLs = new Set(); /** @type {Set} */ @@ -84,14 +84,19 @@ class Canonical extends Audit { let invalidCanonicalLink; /** @type {LH.Artifacts.LinkElement|undefined} */ let relativeCanonicallink; - for (const link of artifacts.LinkElements) { + for (const link of linkElements) { + // Links in the body aren't canonical references for SEO, skip them if (link.source === 'body') continue; if (link.rel === 'canonical') { + // Links that don't have an href aren't canonical references for SEO, skip them if (!link.hrefRaw) continue; + // Links that had an hrefRaw but didn't have a valid href were invalid, flag them if (!link.href) invalidCanonicalLink = link; + // Links that had a valid href but didn't have a valid hrefRaw must have been relatively resolved, flag them else if (!URL.isValid(link.hrefRaw)) relativeCanonicallink = link; + // Otherwise, it was a valid canonical URL else uniqueCanonicalURLs.add(link.href); } else if (link.rel === 'alternate') { if (link.href && link.hreflang) hreflangURLs.add(link.href); @@ -103,9 +108,9 @@ class Canonical extends Audit { /** * @param {CanonicalURLData} canonicalURLData - * @return {string|LH.Audit.Product} + * @return {LH.Audit.Product|undefined} */ - static findValidCanonicaURLOrFinish(canonicalURLData) { + static findInvalidCanonicalURLReason(canonicalURLData) { const {uniqueCanonicalURLs, invalidCanonicalLink, relativeCanonicallink} = canonicalURLData; // the canonical link is totally invalid @@ -142,8 +147,6 @@ class Canonical extends Audit { explanation: str_(UIStrings.explanationConflict, {urlList: canonicalURLs.join(', ')}), }; } - - return canonicalURLs[0]; } /** @@ -199,14 +202,14 @@ class Canonical extends Audit { const mainResource = await MainResource.request({devtoolsLog, URL: artifacts.URL}, context); const baseURL = new URL(mainResource.url); + const canonicalURLData = Canonical.collectCanonicalURLs(artifacts.LinkElements); - const canonicalURLData = Canonical.collectCanonicalURLs(artifacts); - const canonicalURLOrAuditProduct = Canonical.findValidCanonicaURLOrFinish(canonicalURLData); - // We didn't find a valid canonical URL, we found an error result so go ahead and return it. - if (typeof canonicalURLOrAuditProduct === 'object') return canonicalURLOrAuditProduct; + // First we'll check that there was a single valid canonical URL. + const invalidURLAuditProduct = Canonical.findInvalidCanonicalURLReason(canonicalURLData); + if (invalidURLAuditProduct) return invalidURLAuditProduct; - // We found a valid canonical URL, so we'll just check for common mistakes. - const canonicalURL = new URL(canonicalURLOrAuditProduct); + // There was a single valid canonical URL, so now we'll just check for common mistakes. + const canonicalURL = new URL([...canonicalURLData.uniqueCanonicalURLs][0]); const mistakeAuditProduct = Canonical.findCommonCanonicalURLMistakes( canonicalURLData, canonicalURL, diff --git a/lighthouse-core/gather/gatherers/link-elements.js b/lighthouse-core/gather/gatherers/link-elements.js index 73fa83d91108..156dad46c254 100644 --- a/lighthouse-core/gather/gatherers/link-elements.js +++ b/lighthouse-core/gather/gatherers/link-elements.js @@ -12,6 +12,12 @@ const LinkHeader = require('http-link-header'); const getElementsInDocumentString = require('../../lib/page-functions.js') .getElementsInDocumentString; // eslint-disable-line max-len +/** + * @fileoverview + * This gatherer collects all the effect `link` elements, both in the page and declared in the + * headers of the main resource. + */ + /** * * @param {string} url @@ -70,7 +76,6 @@ class LinkElements extends Gatherer { const finalUrl = passContext.url; const records = loadData.networkRecords; const mainDocument = NetworkAnalyzer.findMainDocument(records, finalUrl); - if (!mainDocument) return []; /** @type {LH.Artifacts['LinkElements']} */ const linkElements = []; diff --git a/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js b/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js index cc083fd74959..70baf743b39e 100644 --- a/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js +++ b/lighthouse-core/lib/dependency-graph/simulator/network-analyzer.js @@ -454,6 +454,7 @@ class NetworkAnalyzer { const mainResource = records.find(request => finalURL.startsWith(request.url) && URL.equalWithExcludedFragments(request.url, finalURL)); if (mainResource) return mainResource; + // TODO: beacon !mainResource to Sentry, https://github.com/GoogleChrome/lighthouse/issues/7041 } const documentRequests = records.filter(record => record.resourceType === diff --git a/lighthouse-core/test/audits/seo/canonical-test.js b/lighthouse-core/test/audits/seo/canonical-test.js index 05e1920caa28..425314a4175c 100644 --- a/lighthouse-core/test/audits/seo/canonical-test.js +++ b/lighthouse-core/test/audits/seo/canonical-test.js @@ -12,6 +12,10 @@ const networkRecordsToDevtoolsLog = require('../../network-records-to-devtools-l /* eslint-env jest */ describe('SEO: Document has valid canonical link', () => { + /** + * @param {Partial} overrides + * @return {LH.Artifacts.LinkElement} + */ function link(overrides) { if (overrides.href && !overrides.hrefRaw) overrides.hrefRaw = overrides.href; return { diff --git a/lighthouse-core/test/gather/gatherers/link-elements-test.js b/lighthouse-core/test/gather/gatherers/link-elements-test.js index 14a009b7c7e9..92b9efcf67ed 100644 --- a/lighthouse-core/test/gather/gatherers/link-elements-test.js +++ b/lighthouse-core/test/gather/gatherers/link-elements-test.js @@ -10,10 +10,10 @@ const LinkElements = require('../../../gather/gatherers/link-elements.js'); describe('Link Elements gatherer', () => { - let linkElementsInDOM; - let passContext; - let loadData; - + /** + * @param {Partial} overrides + * @return {LH.Artifact.LinkElement} + */ function link(overrides) { if (overrides.href && !overrides.hrefRaw) overrides.hrefRaw = overrides.href; return { @@ -27,27 +27,22 @@ describe('Link Elements gatherer', () => { }; } - function setMainDocumentHeaders(headers) { + function getPassData({linkElementsInDOM = [], headers = []}) { const url = 'https://example.com'; - loadData.networkRecords.push({url, responseHeaders: headers, resourceType: 'Document'}); - passContext.url = url; - } - - beforeEach(() => { - linkElementsInDOM = []; + const loadData = {networkRecords: [{url, responseHeaders: headers, resourceType: 'Document'}]}; const driver = {evaluateAsync: () => Promise.resolve(linkElementsInDOM)}; - passContext = {driver, url: ''}; - loadData = {networkRecords: [{url: '', responseHeaders: [], resourceType: 'Document'}]}; - }); + const passContext = {driver, url}; + return [passContext, loadData]; + } it('returns elements from DOM', async () => { - linkElementsInDOM = [ + const linkElementsInDOM = [ link({source: 'head', rel: 'preconnect', href: 'https://cdn.example.com'}), link({source: 'head', rel: 'styleSheeT', href: 'https://example.com/a.css'}), link({source: 'body', rel: 'ICON', href: 'https://example.com/a.png'}), ]; - const result = await new LinkElements().afterPass(passContext, loadData); + const result = await new LinkElements().afterPass(...getPassData({linkElementsInDOM})); expect(result).toEqual([ link({source: 'head', rel: 'preconnect', href: 'https://cdn.example.com'}), link({source: 'head', rel: 'stylesheet', href: 'https://example.com/a.css'}), @@ -56,14 +51,14 @@ describe('Link Elements gatherer', () => { }); it('returns elements from headers', async () => { - setMainDocumentHeaders([ + const headers = [ {name: 'Link', value: '; rel=prefetch; as=image'}, {name: 'link', value: '; rel=preconnect; crossorigin=anonymous'}, {name: 'Link', value: '; rel="preload",; rel="canonical"'}, {name: 'LINK', value: '; rel=alternate; hreflang=xx'}, - ]); + ]; - const result = await new LinkElements().afterPass(passContext, loadData); + const result = await new LinkElements().afterPass(...getPassData({headers})); expect(result).toEqual([ link({source: 'headers', rel: 'prefetch', href: 'https://example.com/', as: 'image'}), link({source: 'headers', rel: 'preconnect', href: 'https://example.com/', crossOrigin: 'anonymous'}), // eslint-disable-line max-len @@ -74,16 +69,16 @@ describe('Link Elements gatherer', () => { }); it('combines elements from headers and DOM', async () => { - linkElementsInDOM = [ + const linkElementsInDOM = [ link({source: 'head', rel: 'styleSheeT', href: 'https://example.com/a.css'}), link({source: 'body', rel: 'ICON', href: 'https://example.com/a.png'}), ]; - setMainDocumentHeaders([ + const headers = [ {name: 'Link', value: '; rel=prefetch; as=image'}, - ]); + ]; - const result = await new LinkElements().afterPass(passContext, loadData); + const result = await new LinkElements().afterPass(...getPassData({linkElementsInDOM, headers})); expect(result).toEqual([ link({source: 'head', rel: 'stylesheet', href: 'https://example.com/a.css'}), link({source: 'body', rel: 'icon', href: 'https://example.com/a.png'}), diff --git a/types/artifacts.d.ts b/types/artifacts.d.ts index 144597f92fc1..fed0d0479fa1 100644 --- a/types/artifacts.d.ts +++ b/types/artifacts.d.ts @@ -56,8 +56,6 @@ declare global { AppCacheManifest: string | null; /** Array of all URLs cached in CacheStorage. */ CacheContents: string[]; - /** Href values of link[rel=canonical] nodes found in HEAD (or null, if no href attribute). */ - Canonical: (string | null)[]; /** Console deprecation and intervention warnings logged by Chrome during page load. */ ChromeConsoleMessages: Crdp.Log.EntryAddedEvent[]; /** CSS coverage information for styles used by page's final state. */ @@ -69,7 +67,7 @@ declare global { DOMStats: Artifacts.DOMStats; /** Relevant attributes and child properties of all s, s and s in the page. */ EmbeddedContent: Artifacts.EmbeddedContentInfo[]; - /** All the link elements on the page. */ + /** All the link elements on the page or equivalently declared in `Link` headers. */ LinkElements: Artifacts.LinkElement[]; /** Information for font faces used in the page. */ Fonts: Artifacts.Font[]; From 59bd59c00cb7c26e1f0f8f3f1e555f2c98a6181b Mon Sep 17 00:00:00 2001 From: Patrick Hulce Date: Thu, 7 Mar 2019 18:33:24 -0600 Subject: [PATCH 9/9] add links --- lighthouse-core/audits/seo/canonical.js | 1 + types/artifacts.d.ts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lighthouse-core/audits/seo/canonical.js b/lighthouse-core/audits/seo/canonical.js index e1bd050c485b..1a445570f90e 100644 --- a/lighthouse-core/audits/seo/canonical.js +++ b/lighthouse-core/audits/seo/canonical.js @@ -86,6 +86,7 @@ class Canonical extends Audit { let relativeCanonicallink; for (const link of linkElements) { // Links in the body aren't canonical references for SEO, skip them + /** @see https://html.spec.whatwg.org/multipage/links.html#body-ok */ if (link.source === 'body') continue; if (link.rel === 'canonical') { diff --git a/types/artifacts.d.ts b/types/artifacts.d.ts index fed0d0479fa1..1bf036cbabe0 100644 --- a/types/artifacts.d.ts +++ b/types/artifacts.d.ts @@ -67,7 +67,7 @@ declare global { DOMStats: Artifacts.DOMStats; /** Relevant attributes and child properties of all s, s and s in the page. */ EmbeddedContent: Artifacts.EmbeddedContentInfo[]; - /** All the link elements on the page or equivalently declared in `Link` headers. */ + /** All the link elements on the page or equivalently declared in `Link` headers. @see https://html.spec.whatwg.org/multipage/links.html */ LinkElements: Artifacts.LinkElement[]; /** Information for font faces used in the page. */ Fonts: Artifacts.Font[];