Skip to content

Commit

Permalink
Merge pull request #35 from Lab-317/feature/013-stormmedia
Browse files Browse the repository at this point in the history
add Stormmedia Parser
  • Loading branch information
BalicantaYao committed Nov 10, 2014
2 parents aa47406 + 31f5bfa commit 4e7df1b
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 5 deletions.
2 changes: 1 addition & 1 deletion newsParser/NewsParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# @Author: balicanta
# @Date: 2014-10-25 00:09:39
# @Last Modified by: DinoLai
# @Last Modified time: 2014-11-09 03:49:57
# @Last Modified time: 2014-11-09 04:07:50

import sys

Expand Down
27 changes: 27 additions & 0 deletions newsParser/strategies/StormMediaGroupParseStrategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author: DinoLai
# @Date: 2014-11-08 23:06:56
# @Last Modified by: DinoLai
# @Last Modified time: 2014-11-09 01:54:31

from AbstractNewsParseStrategy import AbstractNewsParseStrategy


class StormMediaGroupParseStrategy(AbstractNewsParseStrategy):

def isURLMatch(self, url):
return ".stormmediagroup.com" in url

def getTitle(self, beautiful_soup_object):
return beautiful_soup_object.select('.innerBigNewsTitle')[0].text

def getAuthor(self, beautiful_soup_object):
return beautiful_soup_object.select('.innerNewsInfo > a')[0].text

def getContent(self, beautiful_soup_object):
#beautiful_soup_object.select('.newsDescBlk')[0].style.decompose()
return beautiful_soup_object.select('.newsDescBlk')[0].text

def getPublishDate(self, beautiful_soup_object):
return beautiful_soup_object.select('.innerNewsInfo')[0].text
5 changes: 3 additions & 2 deletions newsParser/strategies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
# @Author: balicanta
# @Date: 2014-11-01 21:01:48
# @Last Modified by: DinoLai
# @Last Modified time: 2014-11-09 03:59:22
# @Last Modified time: 2014-11-09 04:07:23

__all__ = [
"AnntwNewsParseStrategy",
"CoolLoudParseStrategy",
"LtnNewsParseStrategy",
"PeopleNewsParseStrategy",
"UdnNewsParseStrategy",
"WorldYamParseStrategy"
"WorldYamParseStrategy",
"StormMediaGroupParseStrategy"
]
6 changes: 4 additions & 2 deletions newsParser/tests/NewsParser_Test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# @Author: balicanta
# @Date: 2014-10-25 09:57:26
# @Last Modified by: DinoLai
# @Last Modified time: 2014-11-09 03:58:04
# @Last Modified time: 2014-11-09 04:08:11


from ..NewsParser import NewsParser
Expand All @@ -21,7 +21,9 @@
{"url": "http://www.coolloud.org.tw/node/80590",
"title": "澳洲打工遭台商剝削", "author":"王顥中", "content": "青年勞動九五聯盟29日上午召開記者會"},
{"url": "http://www.peoplenews.tw/news/80a3de53-1c06-4d30-a330-b76e335132f7",
"title": "協助弱勢募款", "author":"朱蒲青", "content": "沒有什麼假想敵啦"}
"title": "協助弱勢募款", "author":"朱蒲青", "content": "沒有什麼假想敵啦"},
{"url": "http://www.stormmediagroup.com/opencms/investigate/detail/1e517551-6735-11e4-a007-ef2804cba5a1/?uuid=1e517551-6735-11e4-a007-ef2804cba5a1",
"title": "藍營高層看連勝文", "author":"王彥喬", "content": "當年郝龍斌選前因為貓纜"}
]


Expand Down

0 comments on commit 4e7df1b

Please sign in to comment.