Skip to content

Commit

Permalink
title script - looks up title of a URL
Browse files Browse the repository at this point in the history
  • Loading branch information
Ricket committed Sep 5, 2012
1 parent 7907315 commit 55847e0
Show file tree
Hide file tree
Showing 3 changed files with 377 additions and 0 deletions.
291 changes: 291 additions & 0 deletions lib/entities.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
var entities = {
' ': '\u00a0',
'¡': '\u00a1',
'¢': '\u00a2',
'£': '\u00a3',
'€': '\u20ac',
'¥': '\u00a5',
'¦': '\u0160',
'§': '\u00a7',
'¨': '\u0161',
'©': '\u00a9',
'ª': '\u00aa',
'«': '\u00ab',
'¬': '\u00ac',
'­': '\u00ad',
'®': '\u00ae',
'¯': '\u00af',
'°': '\u00b0',
'±': '\u00b1',
'²': '\u00b2',
'³': '\u00b3',
'´': '\u017d',
'µ': '\u00b5',
'¶': '\u00b6',
'·': '\u00b7',
'¸': '\u017e',
'¹': '\u00b9',
'º': '\u00ba',
'»': '\u00bb',
'¼': '\u0152',
'½': '\u0153',
'¾': '\u0178',
'¿': '\u00bf',
'À': '\u00c0',
'Á': '\u00c1',
'Â': '\u00c2',
'Ã': '\u00c3',
'Ä': '\u00c4',
'Å': '\u00c5',
'Æ': '\u00c6',
'Ç': '\u00c7',
'È': '\u00c8',
'É': '\u00c9',
'Ê': '\u00ca',
'Ë': '\u00cb',
'Ì': '\u00cc',
'Í': '\u00cd',
'Î': '\u00ce',
'Ï': '\u00cf',
'Ð': '\u00d0',
'Ñ': '\u00d1',
'Ò': '\u00d2',
'Ó': '\u00d3',
'Ô': '\u00d4',
'Õ': '\u00d5',
'Ö': '\u00d6',
'×': '\u00d7',
'Ø': '\u00d8',
'Ù': '\u00d9',
'Ú': '\u00da',
'Û': '\u00db',
'Ü': '\u00dc',
'Ý': '\u00dd',
'Þ': '\u00de',
'ß': '\u00df',
'à': '\u00e0',
'á': '\u00e1',
'â': '\u00e2',
'ã': '\u00e3',
'ä': '\u00e4',
'å': '\u00e5',
'æ': '\u00e6',
'ç': '\u00e7',
'è': '\u00e8',
'é': '\u00e9',
'ê': '\u00ea',
'ë': '\u00eb',
'ì': '\u00ec',
'í': '\u00ed',
'î': '\u00ee',
'ï': '\u00ef',
'ð': '\u00f0',
'ñ': '\u00f1',
'ò': '\u00f2',
'ó': '\u00f3',
'ô': '\u00f4',
'õ': '\u00f5',
'ö': '\u00f6',
'÷': '\u00f7',
'ø': '\u00f8',
'ù': '\u00f9',
'ú': '\u00fa',
'û': '\u00fb',
'ü': '\u00fc',
'ý': '\u00fd',
'þ': '\u00fe',
'ÿ': '\u00ff',
'"': '\u0022',
'<': '\u003c',
'>': '\u003e',
''': '\u0027',
'−': '\u2212',
'ˆ': '\u02c6',
'˜': '\u02dc',
'Š': '\u0160',
'‹': '\u2039',
'Œ': '\u0152',
'‘': '\u2018',
'’': '\u2019',
'“': '\u201c',
'”': '\u201d',
'•': '\u2022',
'–': '\u2013',
'—': '\u2014',
'™': '\u2122',
'š': '\u0161',
'›': '\u203a',
'œ': '\u0153',
'Ÿ': '\u0178',
'ƒ': '\u0192',
'Α': '\u0391',
'Β': '\u0392',
'Γ': '\u0393',
'Δ': '\u0394',
'Ε': '\u0395',
'Ζ': '\u0396',
'Η': '\u0397',
'Θ': '\u0398',
'Ι': '\u0399',
'Κ': '\u039a',
'Λ': '\u039b',
'Μ': '\u039c',
'Ν': '\u039d',
'Ξ': '\u039e',
'Ο': '\u039f',
'Π': '\u03a0',
'Ρ': '\u03a1',
'Σ': '\u03a3',
'Τ': '\u03a4',
'Υ': '\u03a5',
'Φ': '\u03a6',
'Χ': '\u03a7',
'Ψ': '\u03a8',
'Ω': '\u03a9',
'α': '\u03b1',
'β': '\u03b2',
'γ': '\u03b3',
'δ': '\u03b4',
'ε': '\u03b5',
'ζ': '\u03b6',
'η': '\u03b7',
'θ': '\u03b8',
'ι': '\u03b9',
'κ': '\u03ba',
'λ': '\u03bb',
'μ': '\u03bc',
'ν': '\u03bd',
'ξ': '\u03be',
'ο': '\u03bf',
'π': '\u03c0',
'ρ': '\u03c1',
'ς': '\u03c2',
'σ': '\u03c3',
'τ': '\u03c4',
'υ': '\u03c5',
'φ': '\u03c6',
'χ': '\u03c7',
'ψ': '\u03c8',
'ω': '\u03c9',
'ϑ': '\u03d1',
'ϒ': '\u03d2',
'ϖ': '\u03d6',
' ': '\u2002',
' ': '\u2003',
' ': '\u2009',
'‌': '\u200c',
'‍': '\u200d',
'‎': '\u200e',
'‏': '\u200f',
'‚': '\u201a',
'„': '\u201e',
'†': '\u2020',
'‡': '\u2021',
'…': '\u2026',
'‰': '\u2030',
'′': '\u2032',
'″': '\u2033',
'‾': '\u203e',
'⁄': '\u2044',
'€': '\u20ac',
'ℑ': '\u2111',
'℘': '\u2118',
'ℜ': '\u211c',
'ℵ': '\u2135',
'←': '\u2190',
'↑': '\u2191',
'→': '\u2192',
'↓': '\u2193',
'↔': '\u2194',
'↵': '\u21b5',
'⇐': '\u21d0',
'⇑': '\u21d1',
'⇒': '\u21d2',
'⇓': '\u21d3',
'⇔': '\u21d4',
'∀': '\u2200',
'∂': '\u2202',
'∃': '\u2203',
'∅': '\u2205',
'∇': '\u2207',
'∈': '\u2208',
'∉': '\u2209',
'∋': '\u220b',
'∏': '\u220f',
'∑': '\u2211',
'∗': '\u2217',
'√': '\u221a',
'∝': '\u221d',
'∞': '\u221e',
'∠': '\u2220',
'∧': '\u2227',
'∨': '\u2228',
'∩': '\u2229',
'∪': '\u222a',
'∫': '\u222b',
'∴': '\u2234',
'∼': '\u223c',
'≅': '\u2245',
'≈': '\u2248',
'≠': '\u2260',
'≡': '\u2261',
'≤': '\u2264',
'≥': '\u2265',
'⊂': '\u2282',
'⊃': '\u2283',
'⊄': '\u2284',
'⊆': '\u2286',
'⊇': '\u2287',
'⊕': '\u2295',
'⊗': '\u2297',
'⊥': '\u22a5',
'⋅': '\u22c5',
'⌈': '\u2308',
'⌉': '\u2309',
'⌊': '\u230a',
'⌋': '\u230b',
'⟨': '\u2329',
'⟩': '\u232a',
'◊': '\u25ca',
'♠': '\u2660',
'♣': '\u2663',
'♥': '\u2665',
'♦': '\u2666'
};

exports.decode = function (str) {
if (!~str.indexOf('&')) return str;

//Decode literal entities
for (var i in entities) {
str = str.replace(new RegExp(i, 'g'), entities[i]);
}

//Decode hex entities
str = str.replace(/&#x(0*[0-9a-f]{2,5});?/gi, function (m, code) {
return String.fromCharCode(parseInt(+code, 16));
});

//Decode numeric entities
str = str.replace(/&#([0-9]{2,4});?/gi, function (m, code) {
return String.fromCharCode(+code);
});

str = str.replace(/&/g, '&');

return str;
}

exports.encode = function (str) {
str = str.replace(/&/g, '&');

//IE doesn't accept '
str = str.replace(/'/g, ''');

//Encode literal entities
for (var i in entities) {
str = str.replace(new RegExp(entities[i], 'g'), i);
}

return str;
}
20 changes: 20 additions & 0 deletions lib/entities.js.LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Copyright (c) 2010 Chris O'Hara <cohara87@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
66 changes: 66 additions & 0 deletions scripts/title.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// (c) 2012 Richard Carter
// This code is licensed under the MIT license; see LICENSE.txt for details.

// This script handles the following functions:
// some url - look up the url's title and announce it

var http = require('http'),
entities = require('./lib/entities');

listen(/\b((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?]))/i, function(match, data, replyTo) {
var url = match[0];
var req = http.request(url, function(res) {
if(res.statusCode != 200) {
// Ignore 403 Access Forbidden; some websites block bots with this
// code (e.g. Wikipedia).
if(res.statusCode != 403) {
irc.privmsg(replyTo, "" + res.statusCode);
}
req.abort();
} else if(res.headers['content-type'] &&
res.headers['content-type'].toLowerCase().indexOf("text/html") == -1) {
// Not an HTML page
req.abort();
} else {
var data = "";
var titleFound = false;

var hostname = require('url').parse(url).hostname;

res.on('data', function(chunk) {
data += chunk;

var titleMatch = /<title>([^<]+)<\/title>/i.exec(data);
if(titleMatch && titleMatch[1]) {
titleFound = true;

var title = titleMatch[1];

// replace multi-spaces/newlines with spaces
title = title.replace(/\s{2,}/g," ");

// trim front and back
title = title.replace(/^\s+/,"");
title = title.replace(/\s+$/,"");

// decode HTML entities
title = entities.decode(title);

irc.privmsg(replyTo, hostname + " : " + title);
res.pause();
res.destroy();
req.abort();
}
});
res.on('end', function() {
if(!titleFound) {
irc.privmsg(replyTo, hostname + " : title not found");
res.destroy();
}
});
}
}).on('error', function(e) {
irc.privmsg(replyTo, "Error looking up URL: " + e.message);
});
req.end();
});

0 comments on commit 55847e0

Please sign in to comment.