public
Description: A Django Based Blog
Homepage: http://altmansoftware.lighthouseapp.com/projects/19962-shiftingbits
Clone URL: git://github.com/paltman/shiftingbits.git
shiftingbits / import_wp.py
100644 62 lines (53 sloc) 2.808 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from elementtree.ElementTree import parse, tostring
import sys
from datetime import datetime
from shiftingbits.blog.models import Post
from django.contrib.sites.models import Site
 
site = Site.objects.get(id=1)
data = parse(sys.argv[1]).getroot()
 
i = 0
j = 0
posts = data.findall('.//item')
for post in posts:
    pt = post.findtext('{http://wordpress.org/export/1.0/}post_type')
    if pt == 'post':
        i += 1
        title = post.findtext('title')
        slug = post.findtext('{http://wordpress.org/export/1.0/}post_name')
        body = post.findtext('{http://purl.org/rss/1.0/modules/content/}encoded')
        wp_post_id = post.findtext('{http://wordpress.org/export/1.0/}post_id')
        wp_link = post.findtext('link')
        date_published = datetime.strptime(post.findtext('pubDate'), '%a, %d %b %Y %H:%M:%S +0000')
        active = post.findtext('{http://wordpress.org/export/1.0/}status') == 'publish'
        if Post.objects.filter(slug=slug).count():
            slug += '-2'
        entry = Post(title=title, slug=slug, body=body, active=active, create_date=date_published, pub_date=date_published, extra_fields=tostring(post))
        ## TODO: Find all categories and assign them as tags
        categories = post.findall("category")
        entry.save()
        tags = []
        for category in categories:
            if category.get('domain') == 'category':
                tags.append(category.get('nicename'))
        entry.tags = ' '.join(tags)
        entry.save()
        
        
        # TODO: Determine schema for comments for elementtree, determine what tables threadedcomments uses, map appropriate columns linking the entry object.
        comments = post.findall('{http://wordpress.org/export/1.0/}comment')
        for comment in comments:
            j += 1
            removed = False
            if comment.findtext('{http://wordpress.org/export/1.0/}comment_approved') != '1':
                removed = True
            entry.comments.create(
                user_name=comment.findtext('{http://wordpress.org/export/1.0/}comment_author')[0:50],
                user_email=comment.findtext('{http://wordpress.org/export/1.0/}comment_author_email'),
                user_url=comment.findtext('{http://wordpress.org/export/1.0/}comment_author_url'),
                comment=comment.findtext('{http://wordpress.org/export/1.0/}comment_content'),
                submit_date=datetime.strptime(comment.findtext('{http://wordpress.org/export/1.0/}comment_date'), '%Y-%m-%d %H:%M:%S'),
                ip_address=comment.findtext('{http://wordpress.org/export/1.0/}comment_author_IP'),
                is_public=True,
                is_removed=removed,
                site=site
            )
        
print i, 'posts inserted...'
print j, 'comments inserted...'