/
combiner.py
executable file
·128 lines (107 loc) · 3.55 KB
/
combiner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python
"""
Create an RSS feed comprised of several mercurial RSS changelogs.
Copyright (c) 2007 Dustin Sallings <dustin@spy.net>
"""
import os
import md5
import sys
import time
import urllib
import xml.dom.minidom
from xml.dom.ext import PrettyPrint
import fetch
import feedparser
HGLIST='http://hg.west.spy.net/~dustin/hglist.txt'
HGPATTERN='http://hg.west.spy.net/hg/%s/rss-log'
GITHUB='http://github.com/dustin.atom'
TMPDIR='/tmp/combiner'
def getHgList(url=HGLIST, pattern=HGPATTERN):
f=urllib.urlopen(url)
try:
return [pattern % i.strip() for i in f if not i.startswith("private/")]
finally:
f.close()
def getList():
return getHgList() + [GITHUB]
def __makeFilename(url):
return os.path.join(TMPDIR, md5.new(url).hexdigest() + ".xml")
def getFeeds(urls):
rv=[]
for u in urls:
if not os.path.exists(TMPDIR):
os.makedirs(TMPDIR)
fn=__makeFilename(u)
fetch.doUpdate(u, fn)
rv.append(feedparser.parse(fn))
return rv
def combineFeeds(feeds, maxRv=20):
rv=[]
for f in feeds:
if hasattr(f.feed, 'title'):
t=f.feed.title.replace(' Changelog', '')
for e in f.entries:
d=dict(e)
d['title']=t + ': ' + e.title
rv.append(d)
else:
print "No title in %s", repr(f)
return sorted(rv, key=lambda x:x['updated_parsed'], reverse=True)[:maxRv]
def formatTimestamp(ts):
return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(ts))
def generateRss(title, url, descr, items):
dom = xml.dom.minidom.getDOMImplementation()
doc=dom.createDocument(None, "rss", None)
doc.documentElement.setAttribute("version", "2.0")
channel = doc.createElement("channel")
doc.documentElement.appendChild(channel)
def appendText(el, name, val):
newel = doc.createElement(name)
newel.appendChild(doc.createTextNode(val))
el.appendChild(newel)
return newel
appendText(channel, "title", title)
appendText(channel, "link", url)
appendText(channel, "language", "en-us")
appendText(channel, "generator", "feedcombiner")
appendText(channel, "webMaster", "dustin@spy.net")
appendText(channel, "lastBuildDate", formatTimestamp(time.time()))
appendText(channel, "ttl", "86400")
appendText(channel, "description", descr)
def itemCopy(it, i, key):
appendText(it, key, i[key].strip())
for i in items:
try:
it = doc.createElement("item")
itemCopy(it, i, 'link')
itemCopy(it, i, 'title')
if i.has_key('summary'):
summary = i['summary']
else:
summary = i['subtitle']
appendText(it, 'description', summary)
appendText(it, 'pubDate', i['updated'])
channel.appendChild(it)
except KeyError, e:
print "Error", e, "at", i
return doc
def __loadFeeds(picklefile='/tmp/feeds_pickled'):
f=open(picklefile)
try:
import pickle
return pickle.load(f)
finally:
f.close()
if __name__ == '__main__':
feeds=combineFeeds(getFeeds(getList()))
# feeds=combineFeeds(__loadFeeds())
doc=generateRss('Project feeds', 'http://hg.west.spy.net/hg/',
'Recent changes for spy.net projects', feeds)
tmpfile=sys.argv[1] + '.tmp'
o=open(tmpfile, 'w')
try:
PrettyPrint(doc, stream=o, encoding='UTF-8', indent=' ',
preserveElements=None)
os.rename(tmpfile, sys.argv[1])
except:
os.unlink(tmpfile)