forked from imathis/octopress
/
wordpress_xml_import.rb
45 lines (32 loc) · 1.18 KB
/
wordpress_xml_import.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
require 'fileutils'
require 'date'
require 'yaml'
require 'rexml/document'
include REXML
doc = Document.new File.new(ARGV[0])
FileUtils.mkdir_p "_posts"
doc.elements.each("rss/channel/item[wp:status = 'publish' and wp:post_type = 'post']") do |e|
post = e.elements
slug = post['wp:post_name'].text
date = DateTime.parse(post['wp:post_date'].text)
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day, slug]
content = post['content:encoded'].text
content = content.gsub(/<code>(.*?)<\/code>/, '`\1`')
content = content.gsub(/<pre lang="([^"]*)">(.*?)<\/pre>/m, '{% highlight \1 %}\2{% endhighlight %}')
(1..3).each do |i|
content = content.gsub(/<h#{i}>([^<]*)<\/h#{i}>/, ('#'*i) + ' \1')
end
puts "Converting: #{name}"
data = {
'layout' => 'blog_post',
'title' => post['title'].text,
'excerpt' => post['excerpt:encoded'].text,
'wordpress_id' => post['wp:post_id'].text,
'wordpress_url' => post['guid'].text
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end