Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'blogger'

  • Loading branch information...
commit 7139341cd3c8d71d5a9983f8ee6c3c14d39bcc3c 2 parents c0ebc48 + aca701c
@EnigmaCurry authored
View
2  blogofile/site_init/blog_controller/_controllers/blog/permapage.py
@@ -14,7 +14,7 @@ def write_permapages():
for i, post in enumerate(blog.posts):
if post.permalink:
- path = site_re.sub("", post.permalink)
+ path = site_re.sub("", post.permalink).rstrip('.html')
blog.logger.info(u"Writing permapage for post: {0}".format(path))
else:
#Permalinks MUST be specified. No permalink, no page.
View
233 converters/blogger2blogofile.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python
+
+__author__ = "Seth de l'Isle"
+
+#### Usage:
+## You can generate a Blogger export file by logging into blogger,
+## then going to Settings -> Basic -> Export Blog. You will get a
+## file to download with the current date in the name and a .xml
+## extension. Running blogger2blogofile.py in that directory, with
+## the filename of the export file as the only argument will generate
+## a _posts directory ready for use with Blogofile.
+
+
+import sys
+try:
+ import feedparser
+except ImportError:
+ print >> sys.stderr, """This tool requires the universal feedparser module.
+
+Depending on your tools, try:
+ apt-get install python-feedparser
+or:
+ easy_install feedparser
+
+or check out the download files at http://code.google.com/p/feedparser/downloads/list
+ """
+ sys.exit()
+
+import yaml
+import time
+import os
+import codecs
+import unittest
+import pickle
+import shutil
+import base64
+import tarfile
+import io
+import urlparse
+
+class Blogger:
+ def __init__(self, dumpFile):
+ self.feed = feedparser.parse(dumpFile)
+ self.entries = [Entry(entry) for entry in self.feed.entries if self.is_post(entry)]
+
+ @staticmethod
+ def is_post(entry):
+ # tag.term looks like 'http://schemas.google.com/blogger/2008/kind#post'
+ return any([tag for tag in entry.tags if 'kind#post' in tag.term])
+
+ def write_posts(self, targetPath):
+ for entry in self.entries:
+ entry.write_post(targetPath)
+
+class Entry:
+ def __init__(self, feedEntry):
+ self.feedEntry = feedEntry
+ fileNameDate = self.blogofile_date('published').replace('/', '-')
+ self.build_header()
+ dateNameFile = fileNameDate + self.feedEntry.title.replace('/', '-') + '.html'
+ if self.data['draft']:
+ self.postFile = dateNameFile
+ else:
+ permalink = self.data['permalink']
+ bloggerSlug = os.path.basename(urlparse.urlsplit(permalink)[2])
+ self.postFile = time.strftime("%Y-%m-%d", self.feedEntry.published_parsed) + '-' + bloggerSlug
+
+ def build_header(self):
+ allTags = self.feedEntry.tags
+ tags = [tag.term for tag in allTags
+ if not 'schemas.google.com' in tag.term]
+
+ data = {'tags': tags,
+ 'date': self.blogofile_date('published'),
+ 'updated': self.blogofile_date('updated'),
+ 'title': self.feedEntry.title,
+ 'encoding': 'utf8',
+ 'draft': bool('app_draft' in self.feedEntry.keys() and
+ self.feedEntry.app_draft == 'yes'),
+ 'author': self.feedEntry.author_detail.name}
+
+ if 'link' in self.feedEntry.keys():
+ data['permalink'] = urlparse.urlparse(self.feedEntry['link']).path
+
+ self.data = data
+
+
+ def write_post(self, targetPath):
+ entryPath = os.path.join(targetPath, self.postFile)
+
+ if os.path.isfile(entryPath):
+ print >> sys.stderr, "Skipping. Target file already exists: " + entryPath
+ else:
+ targetFile = open(entryPath, 'w')
+ print >> targetFile, '---'
+ print >> targetFile, self.blogofile_header()
+ print >> targetFile, '---'
+ targetFile.write(codecs.encode(self.feedEntry.content[0].value, 'utf8'))
+
+ def blogofile_header(self):
+ return yaml.safe_dump(self.data)
+
+ def blogofile_date(self, dateType):
+ dateStruct = {'published': self.feedEntry.published_parsed,
+ 'updated': self.feedEntry.updated_parsed}[dateType]
+ return time.strftime("%Y/%m/%d %H:%M:%S", dateStruct)
+
+#base64 encoded test files gzipped tarballed as a python string
+testData = (
+"""H4sIAJVWaU0AA+1ZbY/buBHOZ/0KwkG6G3RtkdT79jZoNrkURosiSK57KHrFgpZoSWdZ0klUHOfX
+d2Yk21pnnUva3AUFloBlWSKH8z7Dx0utk1o1rW6mSaOWZlbn8arQj77m4DB818VvEXh8/I3DcTl/
+JITnCCmlwHnC893gEeNflYsTo2uNahh71Ca6yNvTkv/a+//TEcdVvb1tdGrBJa7K1jRdbKrGqrl1
+Hi/33mG9gtvXdPsyj41VCyu+vV10eWHy8vbWSuihtM6T2rHenqm6vgVqpqmKM6t2rRur9qz27VlX
+J8roBJ751o3kgk95MOXeDyK8dPilK2fCk1MeXnJu1QEuaLuFyU2hYUVopWMat8QYkoqs2ORrbfW8
+39J9LYD/8zluYc0Da+5ZcxFZcwd+udKaw5cIfbhYphYCuRYSbx3rTS1c3KXIy1ULxIVnnRe18K0U
+CKbCSklGEYCQWaOXOCO0bjJj6kvb3mw2s0VRpaluZnG1tlF/re15kRCBwwPHiRxX+JEf2XXVmtZO
+9FJ1hbE91w0gAlzf9UTk+tyNQH7gF/gw2xqFl9y6AaUWeaxMXpW2MtX6j+/XBbwROK3RqGgprRud
+5GgJB9RRSxfkkZ6l7jAv/QPzMvhtmJfhmPnoFPNgjz3zjrBuWl0s4U4i8w4aw3EthYRUisZwyBjO
+kTEcNIbRzRpnHIzRxpleq3aWVlVaaBJpEM+WnIf2Ki+TxygKrCJV0wLk1+WfIpLicu8xLoeppP1C
+LUgEcK2/A+cucu66R2p3PStF9sCzuvcw17fadLTTsfpRSXbZPoaZAc4cSIdIOiKlDCb0TtOom2qZ
+F9rmIgIrOl4QSumDIT3pebCQeDdZc2sqo5B/D/wHjOc5+EJ1Jqua20QbldNL967agUQKe6c9mVKR
+6jyI67faZCzRrDibQ8KEZxTJoMaeDKig/VCZ7M/dQjdGx1mZ/9KRauFlhJb3OQjpE9mccgWwBR5w
+ORLtAiSK6MH0Hg+doVmn9zqmT7LV3QKyeTbOIj6Id3/SgCjChDHOGb6HGvB9vA2Q2d7dh0zlg7+b
+aqOahBWqSfW0jVWhGbC9riA6t6yFDKtSUE3AD5qGhYE41h47P6GrpzBb0mJItVS6cb1j3Wx1CzeU
+wzAD65JeUOAER4ETYOAsVIs8ByHc4/c464ScIuu9sTOzRuOFg7uXaQf84wN0y7dn71TR0U+HkrSp
+Q8w8odcHL6rl4EfhERdhYKWwe4o7p4cd60LlJU4P8TFukyJ11BiQj4B8xO+YEiuBOK4q0oVaMoMu
+Y1dVIsotEUZo5Fqzb11/v/U4VPgpeEqz/f37PyGEL6n/44Hn+NLD/g/C+aH/+z3Gb9T/Hfo8d4hI
+AX1d8AP3L93gUgYzHoRTHlFEekd9HlSQ54YpBhtDqgSuwDGLLUs7DV0Hg3QHj3JoJMqUmUyzuqje
+sfOfOu46S7ou6KrpKi9YnRdq+ZQVOmGmArJJ3sZd20Ifwqoly6oN+8eHhV71dPKWtV2tm7xqcHbV
+Neyv6oNaZfT6pw5EcfsHwElDKX1ZNUYtCv2HctHWf1Lxqqw2sFkK/PWPTAZUlyo2bKFj1bX6gm2r
+juG8C7Yjlql3er91v66Gogx8dusZ6x/8qIviAhUAO5NKqlKzqsaWShUM9mug94M3cHskE+lC0JXj
+1eUnnoR0L2bsDapIlSCl6bW8M0aVlvkH1CU8xJqPSiQJoUA12wu26AwrK2JtVeoNzfu+TDFFM9MA
+xYJ6wBn7btEw+9lwJQ0yVeRpeTVBg+tmwmKQt1ZJApJdTXj/u61VvP9dgH6uJqaZxoq0MMWCB1UD
+FgPJRZVs4auBT/LsO8WwXbqaDJ0S1eKynW3yFRwXklzNqia18Zf9CjqmS2jx/Cm4rZTXugHR8pY7
+s5/rdMLITa8m11v2Y14kS93phv3rL69e/o2dY/uVlh2Rwrgq9NLYy6SYYfV8esFevJhe/3P69vnU
+mfF+dtxoUMc7vWMHV0KbrMtWt/ZiO22VDXPtpwz8sV8tZ95nLbVh4tN/X7BlU62B00FK9qKfDfrJ
+1yko3FxN7hN1wjKdpxm+lf6EtU2811wHTqWSexRX46+dYm2TdeuFHdphbJ/Spe0JWb+fnlb1Jk9M
+djVxORgbnMRW8EFb2mRUMuzHTjB5dq2QTJPrFqI/qyCOF2NjQeB/pI8RXXtwHJt8cnDQOdtU5ZnZ
+JQ/yajjcdui1c5ZWGHO4Tx/gFBPNljJUBUHTatXEWR8mkOh+1pALgIvneaLOWpa/zjBaNlmOIbDP
+fH2ygQ6+bilxFUUfiLBrwj6osktUCwmKwrPRuxwxh99dU+7fwO6qhb1AANWiBBRoQG8JxxeatK4g
+j8FLmA2bz6FjjVcQ36CabZMX4FEgWo3zoC+tkv0+/ZK9kIOIS8ybpBuM9BzCGMsAKA9a4ckC7KLA
+vLPJQOT5eNnkS5PUpFco8F8iE7902LlAncBMCnkJ2TiO+qab7T31EPBPXvInkcDrNX/yUjwJOd1/
+/CSkezF5NmRH8iMixqh5Ijm+VAx06wu0PuhhzDvmyXKUYjXUkHVeYIrtwyRRsVZ6txTr25ose5/g
+urxP8F28TZ7tMvSusx9JBr6Yx+AzKNuwc4s8jzyurboyaVmRr3rW13D66k875e6L5MAq0cfmdrQ6
+bapNS/5dvdO9+4C/FAmVElhSEwcdnKTAuvm6xnrbF7k5eNlsAIo+QoXC+1Ch6HC+E5LOeHCgg/P1
+XAZ4sJvD+REPeobgowShIYPI0BtEhsagkEugkHcECo1wFXHAVfCQOgMdrPS2VibOSP8DuML9yIWT
+qRf5gRN60ncdSqDgTXt8hQCmw5FMnMRS5AhLkXAQajTMwsOglOPDqYQz4ms00IthIwthIgSLPASL
+/GOwaIR0yfDTQqFibSHsnZGnvcdO7zgsFcPHg5RT8Ko1AUQHCR0Op+fdmZNgoQNEJEdiOc5YLAca
+TT6SyaFjqOMjhhQcyeSEB5mc6H8HwD62ImFIB5HAq04YzZUH6WDZgN65ZBAXDeIeG8QdGcT9CtDj
+vcyP7eGdhB69kWkQOurRO4+gRw8BAO8YevRGIeL9Soh8vjcRqDRiOLrjQP4oLnw0RAFlqYRkQcDS
+GLYB/WNOFl+QvS/Y0G1AXn5F/LHrMX+EK7UEGL1BvGgEZvoBJhE/vJtE/OgAZgafxCFPgZmBuANm
+BvKzwczAGYOZgUuIY4CcB8deGAQExdys+vMLZGSCkNp0tN1pRJOgnoF+iGBfKI7oh7Knv6yqhGCl
+z6QcuiPKyHnoj7HS8DTc/WmsNAx3+R+pRFbae9wxRhrxu9aM4AbxrX75gJFG8mOMNHLGGGnkfgIj
+jSizRZjZomCPkUbhf4eR3hf+UXQCIxVwXk/HNRSqZUg1FBLtgJE6Qu5KaP/HCqd/Vjj9tcLdccAJ
+7v0WESe4fwdVFTz4ElhV8PAueCp4ROVeHBlXgPh7/FSA2ASgir5V2JVraBXuQqhCeEcYqoDGYQyi
+gqXA+dHIQiDcL0R0H44q5DE/UPVT5CMlHtLR3jswVWAvkPYbprQPKQubHLKPdI8hVQFtwYDgiCkP
+EcFx/B5T9XYIjpD+8T8JAv9YoldhTz6iJoo/IK4P42E8jIfxMB7Gw3gY33b8ByuzBgEAKAAA""")
+
+testDataFile = io.BytesIO(base64.b64decode(testData))
+testDataTar = tarfile.open("tar", mode='r:gz', fileobj=testDataFile)
+entryPickle = testDataTar.extractfile('feedparser-entry.pickle').read()
+draftPickle = testDataTar.extractfile('feedparser-draft.pickle').read()
+
+class MockBlogger(Blogger):
+ def __init__(self):
+ self.entries = [Entry(pickle.loads(entryPickle)),
+ Entry(pickle.loads(draftPickle))]
+
+class TestBloggerfile(unittest.TestCase):
+ def test_entry_header(self):
+ entry = Entry(pickle.loads(entryPickle))
+ header = yaml.load(entry.blogofile_header())
+ assert 'barberry' in header['title'].lower()
+ assert header['date'] == "2010/11/08 06:36:00"
+ assert 'barberry' in header['permalink'].lower()
+ assert 'food' in header['tags']
+ assert header['updated'] == "2010/12/07 06:47:27"
+ assert header['author'] == "Seth de l'Isle"
+ assert header['draft'] == False
+ assert header['encoding'] == 'utf8'
+
+ def test_draft_header(self):
+ entry = Entry(pickle.loads(draftPickle))
+ header = yaml.load(entry.blogofile_header())
+ assert header['draft'] == True
+
+ def test_write_posts(self):
+ if os.path.isdir('test_data'):
+ shutil.rmtree('test_data')
+
+ os.mkdir('test_data')
+
+ targetPath = os.path.join('test_data', '_posts')
+ os.mkdir(targetPath)
+
+ blogger = MockBlogger()
+ blogger.write_posts(targetPath)
+ assert os.path.isfile(os.path.join(targetPath,
+ blogger.entries[0].postFile))
+ assert os.path.isfile(os.path.join(targetPath,
+ blogger.entries[1].postFile))
+
+ if os.path.isdir('test_data'):
+ shutil.rmtree('test_data')
+
+def display_error_and_usage(error):
+ print >> sys.stderr, error
+ print >> sys.stderr, "Usage: bloggerfile.py BloggerExportfile.xml"
+ sys.exit()
+
+if __name__ == '__main__':
+
+ if '-t' in sys.argv:
+ try:
+ del sys.argv[sys.argv.index('-t')]
+ unittest.main()
+ except AttributeError:
+ display_error_and_usage("Error: bad test option(s): " + " ".join(sys.argv[1:]))
+ else:
+ if len(sys.argv) > 2:
+ display_error_and_usage("Error: extra options after Blogger export file: " + " ".join(sys.argv[2:]))
+ try:
+ dumpFile = sys.argv[1]
+ if not os.path.isfile(dumpFile):
+ raise IOError
+ blogger = Blogger(dumpFile)
+ if not os.path.isdir('_posts'):
+ os.mkdir('_posts')
+ blogger.write_posts('_posts')
+ except IndexError:
+ display_error_and_usage("Error: Please specify a Blogger export file.")
+ except IOError:
+ display_error_and_usage("Error: Couldn't read Blogger export file: " + sys.argv[1])
Please sign in to comment.
Something went wrong with that request. Please try again.