Skip to content

Commit

Permalink
Merge branch 'Flimm-unicode-links' (closes #3).
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidAnson committed Apr 12, 2016
2 parents 88e912f + 86b0521 commit 4cef173
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.json
Expand Up @@ -167,7 +167,7 @@
"max-len": "off",
"max-nested-callbacks": "error",
"max-params": ["error", 5],
"max-statements": ["error", 56],
"max-statements": ["error", 58],
"max-statements-per-line": "error",
"new-cap": "error",
"new-parens": "error",
Expand Down
20 changes: 18 additions & 2 deletions checkPages.js
Expand Up @@ -24,6 +24,7 @@ module.exports = function(host, options, done) {
var request = require('request');
var requestFile = require('./requestFile.js');
var sax = require('sax');
var urijs = require('urijs');
var url = require('url');

// Global variables
Expand Down Expand Up @@ -55,6 +56,21 @@ module.exports = function(host, options, done) {
});
}

// Normalizes a URI (to handle international characters and domains)
function normalizeUri(uri) {
var urlParse = url.parse(uri);
if (urlParse.protocol === 'file:') {
return uri;
}
var urijsParse = urijs(uri).normalize();
var normalizedUri = urijsParse.toString();
if ((urlParse.hash === '#') && (urijsParse.hash() === '')) {
// Restore empty fragment removed by URI.js
normalizedUri += '#';
}
return normalizedUri;
}

// Returns a callback to test the specified link
function testLink(page, link, retryWithGet) {
return function(callback) {
Expand Down Expand Up @@ -94,7 +110,7 @@ module.exports = function(host, options, done) {
}
var res = null;
var useGetRequest = retryWithGet || options.queryHashes;
var req = requestFor(link)(link, {
var req = requestFor(link)(normalizeUri(link), {
method: useGetRequest ? 'GET' : 'HEAD',
followRedirect: !options.noRedirects
})
Expand Down Expand Up @@ -172,7 +188,7 @@ module.exports = function(host, options, done) {
return function(callback) {
var logError = logPageError.bind(null, page);
var start = Date.now();
requestFor(page).get(page, function(err, res, body) {
requestFor(page).get(normalizeUri(page), function(err, res, body) {
var elapsed = Date.now() - start;
if (err) {
logError('Page error (' + err.message + '): ' + page + ' (' + elapsed + 'ms)');
Expand Down
3 changes: 2 additions & 1 deletion package.json
Expand Up @@ -26,7 +26,8 @@
"cheerio": "^0.20.0",
"crc-hash": "^0.2.2",
"request": "^2.70.0",
"sax": "^1.2.1"
"sax": "^1.2.1",
"urijs": "^1.17.1"
},
"devDependencies": {
"eslint": "^2.7.0",
Expand Down
31 changes: 29 additions & 2 deletions test/checkPages_test.js
Expand Up @@ -566,7 +566,7 @@ exports.checkPages = {
nock('http://169.254.1.1').head('/').reply(200);
nock('http://[::1]:80').head('/').reply(200);
nock('http://[ff02::1]:80').head('/').reply(200);
nock('http://[0000:0000:0000:0000:0000:0000:0000:0001]:80').head('/').reply(200);
nock('http://[::1]:80').head('/').reply(200);
runTest({
pageUrls: ['http://example.com/localLinks.html'],
checkLinks: true,
Expand Down Expand Up @@ -747,6 +747,33 @@ exports.checkPages = {
[]));
},

checkLinksNonAscii: function(test) {
test.expect(8);
nockFiles(['nonAscii.html']);
nock('http://example.com')
.head(encodeURI('/first/☺')).reply(200)
.get(encodeURI('/first/☺')).reply(200)
.head(encodeURI('/second/☺')).reply(200)
.get(encodeURI('/second/☺')).reply(200)
.head(encodeURI('/third/☺ ☺')).reply(200)
.get(encodeURI('/third/☺ ☺')).reply(200);
nock('http://xn--exampl-gva.com')
.head(encodeURI('/rosé')).reply(200)
.get(encodeURI('/rosé')).reply(200);
runTest({
pageUrls: ['http://example.com/nonAscii.html'],
checkLinks: true
},
testOutput(test,
['Page: http://example.com/nonAscii.html (00ms)',
'Link: http://example.com/first/☺ (00ms)',
'Link: http://example.com/second/%E2%98%BA (00ms)',
'Link: http://example.com/third/☺%20☺ (00ms)',
'Link: http://xn--exampl-gva.com/rosé (00ms)'],
[]
));
},

// checkXhtml functionality

checkXhtmlValid: function(test) {
Expand Down Expand Up @@ -1307,7 +1334,7 @@ exports.checkPages = {
nock('http://localhost').head('/').reply(200);
nock('http://[::1]:80').head('/').reply(200);
nock('http://[ff02::1]:80').head('/').reply(200);
nock('http://[0000:0000:0000:0000:0000:0000:0000:0001]:80').head('/').reply(200);
nock('http://[::1]:80').head('/').reply(200);
runTest({
pageUrls: ['test/localLinks.html'],
checkLinks: true,
Expand Down
13 changes: 13 additions & 0 deletions test/nonAscii.html
@@ -0,0 +1,13 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8">
<title>Valid Page</title>
</head>
<body>
<a href="http://example.com/first/☺">http://example.com/first/☺</a>
<a href="http://example.com/second/%E2%98%BA">http://example.com/second/%E2%98%BA</a>
<a href="http://example.com/third/☺ ☺">http://example.com/third/☺ ☺</a>
<a href="http://examplé.com/rosé">http://examplé.com/rosé</a>
</body>
</html>

0 comments on commit 4cef173

Please sign in to comment.