-
Notifications
You must be signed in to change notification settings - Fork 2
/
blog_rss_parse_test.py
83 lines (66 loc) · 2.15 KB
/
blog_rss_parse_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from parsers.blog_rss_parser import BlogFeedParser
from utils.str_util import getDicStr, getListStr
from utils.http_client import downloadPage
from utils.io_util import readFile
SAVE_PATH = "C:\Users\jsilva\Desktop\Temp\[SC] blog\\rss\parse_result.txt"
def testAll():
rss_links = """
major
http://blog.rss.naver.com/anjigagu.xml
http://enjoiyourlife.com/rss
http://blog.daum.net/xml/rss/ktg0205
http://rss.egloos.com/blog/plasmid
Blogger
http://www.windycitymom.org/feeds/posts/default?alt=rss
http://www.unodetantosblogs.com/feeds/posts/default?alt=rss
http://www.coles-corner-and-creations.com/feeds/posts/default?alt=rss
워드프레스
http://kimcya.com/feed/
http://null.perl-hackers.net/?feed=rss2
http://information-plus.net/feed
XE
http://www.onlifezone.com/rss
http://underkg.co.kr/rss
http://my.blogkor.com/textyle/rss
두루팔
http://www.lifeformula.net/rss.xml
"""
file = open(SAVE_PATH, mode="a")
urls = rss_links.split("\n")
xml_parser = BlogFeedParser()
for url in urls:
url = url.strip()
if not url.startswith("http://"):
continue
print "Visit : %s"%url
file.write("==================== PARSE [%s] ====================\n"%url)
channel_data, link_data = testURL(url, xml_parser, file)
file.flush()
print " Parsed : %s"%url
file.close()
def testURL(url, xml_parser=None, output_file=None):
downData = downloadPage(url)
return testXML(downData["content"], xml_parser, output_file)
def testFile(path, xml_parser=None, output_file=None):
xml_str = readFile(path)
return testXML(xml_str, xml_parser, output_file)
def testXML(xml_str, xml_parser=None, output_file=None):
if not xml_parser:
xml_parser = BlogFeedParser()
xml_parser.feed(xml_str)
channel_data = xml_parser.getChannelData()
link_data = xml_parser.getLinks()
channel_data_str = getDicStr(channel_data)
link_data_str = getListStr(link_data)
print channel_data_str
print link_data_str
if output_file:
output_file.write(channel_data_str)
output_file.write(link_data_str)
return channel_data, link_data
if __name__:
#testAll()
testFile("resources/rss_feed_burner_sample.xml")
#testURL("http://feeds.feedburner.com/vs-rss")