from elementtree.ElementTree import parse, tostring
import sys
from datetime import datetime
from shiftingbits.blog.models import Post
from django.contrib.sites.models import Site
site = Site.objects.get(id=1)
data = parse(sys.argv[1]).getroot()
i = 0
j = 0
posts = data.findall('.//item')
for post in posts:
pt = post.findtext('{http://wordpress.org/export/1.0/}post_type')
if pt == 'post':
i += 1
title = post.findtext('title')
slug = post.findtext('{http://wordpress.org/export/1.0/}post_name')
body = post.findtext('{http://purl.org/rss/1.0/modules/content/}encoded')
wp_post_id = post.findtext('{http://wordpress.org/export/1.0/}post_id')
wp_link = post.findtext('link')
date_published = datetime.strptime(post.findtext('pubDate'), '%a, %d %b %Y %H:%M:%S +0000')
active = post.findtext('{http://wordpress.org/export/1.0/}status') == 'publish'
if Post.objects.filter(slug=slug).count():
slug += '-2'
entry = Post(title=title, slug=slug, body=body, active=active, create_date=date_published, pub_date=date_published, extra_fields=tostring(post))
## TODO: Find all categories and assign them as tags
categories = post.findall("category")
entry.save()
tags = []
for category in categories:
if category.get('domain') == 'category':
tags.append(category.get('nicename'))
entry.tags = ' '.join(tags)
entry.save()
# TODO: Determine schema for comments for elementtree, determine what tables threadedcomments uses, map appropriate columns linking the entry object.
comments = post.findall('{http://wordpress.org/export/1.0/}comment')
for comment in comments:
j += 1
removed = False
if comment.findtext('{http://wordpress.org/export/1.0/}comment_approved') != '1':
removed = True
entry.comments.create(
user_name=comment.findtext('{http://wordpress.org/export/1.0/}comment_author')[0:50],
user_email=comment.findtext('{http://wordpress.org/export/1.0/}comment_author_email'),
user_url=comment.findtext('{http://wordpress.org/export/1.0/}comment_author_url'),
comment=comment.findtext('{http://wordpress.org/export/1.0/}comment_content'),
submit_date=datetime.strptime(comment.findtext('{http://wordpress.org/export/1.0/}comment_date'), '%Y-%m-%d %H:%M:%S'),
ip_address=comment.findtext('{http://wordpress.org/export/1.0/}comment_author_IP'),
is_public=True,
is_removed=removed,
site=site
)
print i, 'posts inserted...'
print j, 'comments inserted...'