Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
removed utils2 seperated functions, cache, config
- Loading branch information
Matthias -apoc- Hecker
committed
Feb 15, 2011
1 parent
4287bad
commit eb8f43c
Showing
13 changed files
with
316 additions
and
265 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,25 @@ | ||
|
||
I've ordered the list after the importance. | ||
Readability | ||
|
||
- readability returns: "Sorry, unable to parse article content. Please view | ||
the original page instead." if it was unable to parse it. Should detect | ||
that and restore the original excerpt. | ||
- reuse the excerpt found in the feed, for instance to improve the readability | ||
detection or just include it in the feed somehow. (currently its thrown away) | ||
that and restore the original excerpt. (maybe keep the excerpt anyways) | ||
- manage to use the readability dom tree for filtering | ||
|
||
Proxy | ||
|
||
- detect images (and maybe other content) and download it locally so that the | ||
proxy can serve it from cache. *or* make sure the image links are pointing | ||
to an absolute url. | ||
- make cache files a little bit more accessible: use directories for domains | ||
and timestamps etc. for the filename? How about creating a special | ||
directory with more usable information as directories and softlinks? | ||
- gzip the cache files | ||
- currently I built a jsdom tree for the jquery selector filtering, but this | ||
does not really make sense because readability is creating a jsdom tree | ||
anyways, it would be cool to have pre and post hooks to manipulate the | ||
dom tree that readability is using. | ||
- ... | ||
- fetch and respect the robots.txt file of the sites | ||
- rewrite the feed parser/generator using jsdom etc. I do not like the way | ||
the current implementation is building the feed xml with the &replace.. | ||
- refactor the utils2 (or maybe use another library for this anyway) | ||
- ... | ||
- sort this list | ||
proxy can serve it from cache. | ||
- caching proxy that runs readability for text/html | ||
|
||
Cache | ||
|
||
- gzip the cache files (make generic api for that) | ||
|
||
Feed Parser | ||
|
||
- rewrite the feed parser/generator using a xml dom library | ||
|
||
Crawler | ||
|
||
- fetch and respect the robots.txt file | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
var uri = require('url'), | ||
fs = require('fs'); | ||
var func = require('./func.js'), | ||
cfg = require('./cfg.js'); | ||
|
||
// returns the name of the cache file for the supplied url | ||
function filename(ext, url) | ||
{ | ||
var domain = uri.parse(url).hostname, | ||
urlhash = func.sha1(url); | ||
var cache_path = cfg.get('cache_path')+'/'+domain; | ||
|
||
if(!func.file_exists(cache_path)) { | ||
console.log('create domain directory: '+cache_path); | ||
fs.mkdirSync(cache_path, 0755); | ||
} | ||
|
||
return cache_path + '/' + urlhash + '.' + ext; | ||
} | ||
exports.filename = filename; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/** | ||
* Module to load and access settings. | ||
*/ | ||
var fs = require('fs'); | ||
var func = require('./func.js'); | ||
|
||
var settings = null; | ||
|
||
// load the configuration settings | ||
function load() { | ||
if(settings == null) { | ||
console.log('load settings.json file'); | ||
try { | ||
settings = JSON.parse(fs.readFileSync('settings.json', 'utf8')); | ||
if(func.file_exists('user_settings.json')) { | ||
console.log('found and load the user_settings.json file'); | ||
var user_settings = JSON.parse(fs.readFileSync('user_settings.json', 'utf8')); | ||
settings = func.object_merge(settings, user_settings); | ||
} | ||
} | ||
catch (error) { | ||
console.log('[ERROR] loading settings: '+error); | ||
} | ||
} | ||
} | ||
load(); | ||
|
||
function get(key) { | ||
return settings[key]; | ||
} | ||
exports.get = get; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.