diff --git a/main.py b/main.py index cabc583..cb08349 100755 --- a/main.py +++ b/main.py @@ -22,11 +22,38 @@ # # -# check lxml and dateutil + + +import sys + +def err (module_name, package_name): + sys.stderr.write( + "{0} module is unavaliable. Please install it\n(e.g. 'sudo apt-get install {1}')\n".format( + module_name, + package_name + )) + sys.exit(2) + +try: + import lxml +except ImportError: + err('lxml', 'python-lxml') + +try: + import dateutil +except ImportError: + err('dateutil', 'python-dateutil') + + + from plugins import * +import plugins + + -h = [disnews.DISNews, p4reviews.P4Reviews, p4news.P4News, disreviews.DISReviews, gorillavsbearnews.GorillaVsBearNews] -h = [p4news.P4News] -for p in h: - p().handle() +try: + for site in plugins.get_plugins(sys.argv[1:]): + site().handle() +except ValueError as e: + print e diff --git a/plugins/__init__.py b/plugins/__init__.py index 2afa19b..b160baf 100644 --- a/plugins/__init__.py +++ b/plugins/__init__.py @@ -22,6 +22,12 @@ # # + + +import random + + + __all__ = ( 'p4reviews', 'disreviews', @@ -29,3 +35,59 @@ 'disnews', 'gorillavsbearnews' ) + +_plugins = {} # {plugin_name : plugin_class} + + + +def register (plugin_class, *plugin_names): + for name in plugin_names: + if name in _plugins: + raise KeyError(name) + + for name in plugin_names: + _plugins[name] = plugin_class + + +def unregister(*plugin_names): + for name in plugin_names: + try: + del _plugin[name] + except KeyError: + pass + + +def have_plugin (plugin_name): + return plugin_name in _plugins + + +def get_unknown_plugins (plugin_names): + return filter( + lambda name: not have_plugin(name), + plugin_names + ) + + +def going_for_all (plugin_names): + return len(plugin_names) == 1 and 'all' == plugin_names[0] + + +def prepared (plugs): + ret = list(frozenset(plugs)) + random.shuffle(ret) # trick to decrease load on particular service + return ret + + +def get_registered_plugins (plugin_names): + return ( _plugins[name] for name in plugin_names ) + + +def get_plugins (plugin_names): + if going_for_all (plugin_names): + return prepared(_plugins.values()) + else: + unknown = get_unknown_plugins(plugin_names) + if len(unknown) == 0: + return prepared(get_registered_plugins(plugin_names)) + else: + raise ValueError('Unknown plugins: {0}'.format(str(unknown))) diff --git a/plugins/disnews.py b/plugins/disnews.py index 8cdafe4..57a3459 100644 --- a/plugins/disnews.py +++ b/plugins/disnews.py @@ -77,3 +77,7 @@ def get_pagecount (self): pagination = website.parser(handler.read()).cssselect('#content .pagination a') handler.close() return int(pagination[-2].text) + + + +DISNews.register() diff --git a/plugins/disreviews.py b/plugins/disreviews.py index 59a63bc..5acee0c 100644 --- a/plugins/disreviews.py +++ b/plugins/disreviews.py @@ -110,3 +110,7 @@ def get_pagecount (self): def get_sorter (self, tupl): return tupl[1] + tupl[2] + + + +DISReviews.register() diff --git a/plugins/gorillavsbearnews.py b/plugins/gorillavsbearnews.py index da529ba..5069ba8 100644 --- a/plugins/gorillavsbearnews.py +++ b/plugins/gorillavsbearnews.py @@ -79,3 +79,7 @@ def handle_element(self, element): date = self.get_date(element) return (url, title, author, date) + + + +GorillaVsBearNews.register() diff --git a/plugins/p4news.py b/plugins/p4news.py index b3d9603..fd870b7 100644 --- a/plugins/p4news.py +++ b/plugins/p4news.py @@ -89,3 +89,7 @@ def handle_element(self, element): date = self.get_date(element) return (url, title, author, date) + + + +P4News.register() diff --git a/plugins/p4reviews.py b/plugins/p4reviews.py index d818d85..8884c77 100644 --- a/plugins/p4reviews.py +++ b/plugins/p4reviews.py @@ -102,3 +102,7 @@ def get_page_data (self, url, content): def get_sorter (self, tupl): return tupl[1] + tupl[2] + + + +P4Reviews.register() diff --git a/website.py b/website.py index f158cc3..f9b6b68 100644 --- a/website.py +++ b/website.py @@ -35,9 +35,10 @@ import workerconsumerpool as wcp import pagecounter import utils +import plugins from lxml.html import document_fromstring as parser -from lxml.html import tostring as parser_str + def stripped (func): @@ -75,6 +76,11 @@ def get_page_content (url): return content + @classmethod + def register (cls): + plugins.register(cls, cls.__name__.lower()) + + def __init__ ( self, domain, @@ -107,8 +113,8 @@ def handle (self): print 'Handling {0}'.format(self.task_name) print '{0} pages to handle'.format(self.get_pagecount()) - #for page in xrange(self.page_counter.left_bound, self.get_pagecount()+1): - for page in xrange(self.page_counter.left_bound, 5): + for page in xrange(self.page_counter.left_bound, self.get_pagecount()+1): + #for page in xrange(self.page_counter.left_bound, 5): parse_results = None for attempt in xrange(self.tries): parse_results = self._parse_linkpage(page)