Skip to content

Commit

Permalink
Implement Firefox-compatible Content-Type parsing.
Browse files Browse the repository at this point in the history
Verified using the unit tests from
https://searchfox.org/mozilla-central/rev/a662f122c37704456457a526af90db4e3c0fd10e/netwerk/test/unit/test_parse_content_type.js#28-199

1. Inspect background page.
2. Run following snippet and then paste the tests from the above URL.

```
netutil = {
 parseRequestContentType() { },
 parseResponseContentType(t) {console.log(t);return ContentHandlers.parseResponseContentType(t); }
};
check = (t, c) => {
if (type) {
 console.assert(type.mimeType == t, `Expected (${t}), got (${type.mimeType})`);
 console.assert(type.charset == c, `Expected (${c}), got (${type.charset})`);
}
type = null;
}
charset = hadCharset = null;
```
  • Loading branch information
Rob--W committed Nov 15, 2017
1 parent 0e71055 commit a6b926e
Showing 1 changed file with 133 additions and 5 deletions.
138 changes: 133 additions & 5 deletions extension/content-handlers-firefox.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,28 @@ var ContentHandlers = {};
* @returns {ParsedContentType} The parsed value of `contentType`.
*/
ContentHandlers.parseResponseContentType = function(contentType) {
// TODO: Implement
let parsedContentType = {
contentType,
mimeType: '',
charset: '',
};
// JS implementation of net_ParseContentType:
// https://searchfox.org/mozilla-central/rev/8a6a6bef7c54425970aa4fb039cc6463a19c0b7f/netwerk/base/nsURLHelper.cpp#978-1030
let curTypeStart = 0;
do {
// curTypeStart points to the start of the current media-type. We want
// to look for its end.
let curTypeEnd = _findMediaDelimiter(contentType, curTypeStart, ',');
// At this point curTypeEnd points to the spot where the media-type
// starting at curTypeEnd ends. Time to parse that!
_parseMediaType(
contentType.substring(curTypeStart, curTypeEnd),
parsedContentType);
// And let's move on to the next media-type
curTypeStart = curTypeEnd + 1;
} while (curTypeStart < contentType.length);

// For now, cater for the common case ("type/subtype"):
let mimeType = contentType.split(';', 1)[0].trim().toLowerCase();
let charset = '';
return {contentType, mimeType, charset};
return parsedContentType;
};

/**
Expand Down Expand Up @@ -148,6 +163,119 @@ startxref
});
}

// Find media delimiter; skipping over quoted strings if any.
function _findMediaDelimiter(flatStr, searchStart, delimiter) {
// JS implementation of net_FindMediaDelimiter:
// https://searchfox.org/mozilla-central/rev/a662f122c37704456457a526af90db4e3c0fd10e/netwerk/base/nsURLHelper.cpp#798-831
for (;;) {
let curDelimPos = flatStr.indexOf(delimiter, searchStart);
if (curDelimPos === -1) curDelimPos = flatStr.length;
let strDelimPos = flatStr.indexOf('"', searchStart);
if (strDelimPos === -1 || strDelimPos >= curDelimPos) return curDelimPos;
// We hit the start of a quoted string. Look for its end.
searchStart = _findStringEnd(flatStr, strDelimPos);
if (searchStart === flatStr.length) return searchStart;
++searchStart;
// searchStart now points to the first char after the end of the
// string, so just go back to the top of the loop and look for
// |delimiter| again.
}
}

function _findPatternOrEnd(flatStr, searchStart, pattern) {
var flatStrLen = flatStr.length;
if (searchStart && searchStart < flatStrLen) {
flatStr = flatStr.slice(searchStart);
}
let i = flatStr.search(pattern);
return i === -1 ? flatStrLen : searchStart + i;
}

function _findStringEnd(flatStr, stringStart) {
// JS implementation of net_FindStringEnd:
// https://searchfox.org/mozilla-central/rev/a662f122c37704456457a526af90db4e3c0fd10e/netwerk/base/nsURLHelper.cpp#756-795
// Firefox's implementation allows for single quotes, but in practice only double-quotes are
// passed. This is because RFC 2616 only allows double quotes as delimiter.
console.assert(flatStr.charAt(stringStart) === '"', 'stringStart must be a double quote.');
for (let i = stringStart + 1; i < flatStr.length; ++i) {
if (flatStr[i] === '"') {
return i;
}
if (flatStr[i] === '\\') {
// Hit a backslash-escaped char. Need to skip over it.
++i;
}
}
return flatStr.length;
}

function _parseMediaType(flatStr, parsedContentType) {
// JS implementation of net_ParseMediaType:
// https://searchfox.org/mozilla-central/rev/a662f122c37704456457a526af90db4e3c0fd10e/netwerk/base/nsURLHelper.cpp#833-962
// The following parameters are implemented:
// aMediaTypeStr -> flatStr
// &aContentType -> parsedContentType.mimeType
// &aContentCharset -> parsedContentType.charset
// All others (aHadCharset, aCharsetStart, aCharsetEnd, aStrict) are not.

// Skip HTTP_LWS.
let typeStart = _findPatternOrEnd(flatStr, 0, /[^ \t]/);
let typeEnd = _findPatternOrEnd(flatStr, typeStart, /[ \t;(]/);

let charsetValue = '';
let paramStart = flatStr.indexOf(';', typeEnd);
if (paramStart !== -1) {
let curParamStart = paramStart + 1;
do {
let curParamEnd = _findMediaDelimiter(flatStr, curParamStart, ';');
let charsetPattern = /[ \t]*charset=/iy;
charsetPattern.lastIndex = curParamStart;
if (charsetPattern.test(flatStr)) {
charsetValue = flatStr.slice(charsetPattern.lastIndex, curParamEnd);
}
curParamStart = curParamEnd + 1;
} while (curParamStart < flatStr.length);
}

let charsetNeedsQuotedStringUnescaping = false;
if (charsetValue) {
if (charsetValue.charAt(0) === '"') {
charsetNeedsQuotedStringUnescaping = true;
let end = _findStringEnd(charsetValue, 0);
charsetValue = charsetValue.slice(1, end);
} else {
let end = _findPatternOrEnd(charsetValue, 0, /[ \t;(]/);
charsetValue = charsetValue.slice(0, end);
}
}

// if the server sent "*/*", it is meaningless, so do not store it.
// also, if type is the same as aContentType, then just update the
// charset. however, if charset is empty and aContentType hasn't
// changed, then don't wipe-out an existing aContentCharset. We
// also want to reject a mime-type if it does not include a slash.
// some servers give junk after the charset parameter, which may
// include a comma, so this check makes us a bit more tolerant.

let mimeType = flatStr.slice(typeStart, typeEnd).toLowerCase();
if (mimeType && mimeType.includes('/') && mimeType !== '*/*') {
let eq = parsedContentType.mimeType !== '' &&
parsedContentType.mimeType === mimeType;
if (!eq) {
parsedContentType.mimeType = mimeType;
}
if (!eq && parsedContentType.charset || charsetValue) {
if (charsetNeedsQuotedStringUnescaping) {
// parameters using the "quoted-string" syntax need
// backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
parsedContentType.charset = charsetValue.replace(/\\(.)/g, '$1');
} else {
parsedContentType.charset = charsetValue;
}
}
}
}

// Most of the following types are registered in Firefox to the "Gecko-Content-Viewers" category.
// Some of the types (in MIME_TYPES_OTHER) are stream converters ("@mozilla.org/streamconv;1"),
// content handler ("@mozilla.org/uriloader/content-handler;1") or
Expand Down

0 comments on commit a6b926e

Please sign in to comment.