Skip to content

Commit

Permalink
moved to platform core to avoid inclusion multiple times
Browse files Browse the repository at this point in the history
  • Loading branch information
thatcher committed Apr 27, 2010
1 parent 09ce067 commit aa60e10
Showing 1 changed file with 329 additions and 0 deletions.
329 changes: 329 additions & 0 deletions src/platform/core/urlparse.js
@@ -0,0 +1,329 @@
/*
* Copyright (c) 2010 Nick Galbreath
* http://code.google.com/p/stringencoders/source/browse/#svn/trunk/javascript
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/

/*
* url processing in the spirit of python's urlparse module
* see `pydoc urlparse` or
* http://docs.python.org/library/urlparse.html
*
* urlsplit: break apart a URL into components
* urlunsplit: reconsistute a URL from componets
* urljoin: join an absolute and another URL
* urldefrag: remove the fragment from a URL
*
* Take a look at the tests in urlparse-test.html
*
* On URL Normalization:
*
* urlsplit only does minor normalization the components Only scheme
* and hostname are lowercased urljoin does a bit more, normalizing
* paths with "." and "..".
* urlnormalize adds additional normalization
*
* * removes default port numbers
* http://abc.com:80/ -> http://abc.com/, etc
* * normalizes path
* http://abc.com -> http://abc.com/
* and other "." and ".." cleanups
* * if file, remove query and fragment
*
* It does not do:
* * normalizes escaped hex values
* http://abc.com/%7efoo -> http://abc.com/%7Efoo
* * normalize '+' <--> '%20'
*
* Differences with Python
*
* The javascript urlsplit returns a normal object with the following
* properties: scheme, netloc, hostname, port, path, query, fragment.
* All properties are read-write.
*
* In python, the resulting object is not a dict, but a specialized,
* read-only, and has alternative tuple interface (e.g. obj[0] ==
* obj.scheme). It's not clear why such a simple function requires
* a unique datastructure.
*
* urlunsplit in javascript takes an duck-typed object,
* { scheme: 'http', netloc: 'abc.com', ...}
* while in * python it takes a list-like object.
* ['http', 'abc.com'... ]
*
* For all functions, the javascript version use
* hostname+port if netloc is missing. In python
* hostname+port were always ignored.
*
* Similar functionality in different languages:
*
* http://php.net/manual/en/function.parse-url.php
* returns assocative array but cannot handle relative URL
*
* TODO: test allowfragments more
* TODO: test netloc missing, but hostname present
*/

var urlparse = {};

// Unlike to be useful standalone
//
// NORMALIZE PATH with "../" and "./"
// http://en.wikipedia.org/wiki/URL_normalization
// http://tools.ietf.org/html/rfc3986#section-5.2.3
//
urlparse.normalizepath = function(path)
{
if (!path || path === '/') {
return '/';
}

var parts = path.split('/');

var newparts = [];
// make sure path always starts with '/'
if (parts[0]) {
newparts.push('');
}

for (var i = 0; i < parts.length; ++i) {
if (parts[i] === '..') {
if (newparts.length > 1) {
newparts.pop();
} else {
newparts.push(parts[i]);
}
} else if (parts[i] != '.') {
newparts.push(parts[i]);
}
}

path = newparts.join('/');
if (!path) {
path = '/';
}
return path;
};

//
// Does many of the normalizations that the stock
// python urlsplit/urlunsplit/urljoin neglects
//
// Doesn't do hex-escape normalization on path or query
// %7e -> %7E
// Nor, '+' <--> %20 translation
//
urlparse.urlnormalize = function(url)
{
var parts = urlparse.urlsplit(url);
switch (parts.scheme) {
case 'file':
// files can't have query strings
// and we don't bother with fragments
parts.query = '';
parts.fragment = '';
break;
case 'http':
case 'https':
// remove default port
if ((parts.scheme === 'http' && parts.port == 80) ||
(parts.scheme === 'https' && parts.port == 443)) {
parts.port = null;
// hostname is already lower case
parts.netloc = parts.hostname;
}
break;
default:
// if we don't have specific normalizations for this
// scheme, return the original url unmolested
return url;
}

// for [file|http|https]. Not sure about other schemes
parts.path = urlparse.normalizepath(parts.path);

return urlparse.urlunsplit(parts);
};

urlparse.urldefrag = function(url)
{
var idx = url.indexOf('#');
if (idx == -1) {
return [ url, '' ];
} else {
return [ url.substr(0,idx), url.substr(idx+1) ];
}
};

urlparse.urlsplit = function(url, default_scheme, allow_fragments)
{
var leftover;

if (typeof allow_fragments === 'undefined') {
allow_fragments = true;
}

// scheme (optional), host, port
var fullurl = /^([A-Za-z]+)?(:?\/\/)([0-9.\-A-Za-z]*)(?::(\d+))?(.*)$/;
// path, query, fragment
var parse_leftovers = /([^?#]*)?(?:\?([^#]*))?(?:#(.*))?$/;

var o = {};

var parts = url.match(fullurl);
if (parts) {
o.scheme = parts[1] || default_scheme || '';
o.hostname = parts[3].toLowerCase() || '';
o.port = parseInt(parts[4],10) || '';
// Probably should grab the netloc from regexp
// and then parse again for hostname/port

o.netloc = parts[3];
if (parts[4]) {
o.netloc += ':' + parts[4];
}

leftover = parts[5];
} else {
o.scheme = default_scheme || '';
o.netloc = '';
o.hostname = '';
leftover = url;
}
o.scheme = o.scheme.toLowerCase();

parts = leftover.match(parse_leftovers);

o.path = parts[1] || '';
o.query = parts[2] || '';

if (allow_fragments) {
o.fragment = parts[3] || '';
} else {
o.fragment = '';
}

return o;
};

urlparse.urlunsplit = function(o) {
var s = '';
if (o.scheme) {
s += o.scheme + '://';
}

if (o.netloc) {
if (s == '') {
s += '//';
}
s += o.netloc;
} else if (o.hostname) {
// extension. Python only uses netloc
if (s == '') {
s += '//';
}
s += o.hostname;
if (o.port) {
s += ':' + o.port;
}
}

if (o.path) {
s += o.path;
}

if (o.query) {
s += '?' + o.query;
}
if (o.fragment) {
s += '#' + o.fragment;
}
return s;
};

urlparse.urljoin = function(base, url, allow_fragments)
{
if (typeof allow_fragments === 'undefined') {
allow_fragments = true;
}

var url_parts = urlparse.urlsplit(url);

// if url parts has a scheme (i.e. absolute)
// then nothing to do
if (url_parts.scheme) {
if (! allow_fragments) {
return url;
} else {
return urlparse.urldefrag(url)[0];
}
}
var base_parts = urlparse.urlsplit(base);

// copy base, only if not present
if (!base_parts.scheme) {
base_parts.scheme = url_parts.scheme;
}

// copy netloc, only if not present
if (!base_parts.netloc || !base_parts.hostname) {
base_parts.netloc = url_parts.netloc;
base_parts.hostname = url_parts.hostname;
base_parts.port = url_parts.port;
}

// paths
if (url_parts.path.length > 0) {
if (url_parts.path.charAt(0) == '/') {
base_parts.path = url_parts.path;
} else {
// relative path.. get rid of "current filename" and
// replace. Same as var parts =
// base_parts.path.split('/'); parts[parts.length-1] =
// url_parts.path; base_parts.path = parts.join('/');
var idx = base_parts.path.lastIndexOf('/');
if (idx == -1) {
base_parts.path = url_parts.path;
} else {
base_parts.path = base_parts.path.substr(0,idx) + '/' +
url_parts.path;
}
}
}

// clean up path
base_parts.path = urlparse.normalizepath(base_parts.path);

// copy query string
base_parts.query = url_parts.query;

// copy fragments
if (allow_fragments) {
base_parts.fragment = url_parts.fragment;
} else {
base_parts.fragment = '';
}

return urlparse.urlunsplit(base_parts);
};

0 comments on commit aa60e10

Please sign in to comment.