Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Markdown is integrated more now, parses text where it is necessary to…

… create new HTML structure (such as for conversations). Markdown form quotes are used for quote-posts now, where Markdown is preserved. Title-extraction now works fully, removing the repeated line when safe. Script now generates warnings about Tumblr hosted audio files, and includes them as XML comments in the output.
  • Loading branch information...
commit 955a887db5f04fd728b639aa76146611549907d3 1 parent faf1c21
@BenWard authored
Showing with 153 additions and 80 deletions.
  1. +12 −1 README
  2. +101 −79 index.php
  3. +40 −0 t2w.css
View
13 README
@@ -45,4 +45,15 @@ This work is licensed under the GPL v3 <http://www.gnu.org/licenses/gpl.html>
* BUGFIX: Captions were not being included for video files.
* BUGFIX: Audio links now have text 'Audio'. Caption follows below. This
prevents nesting problems if the caption itself contains links elsewhere,
- or block elements.
+ or block elements.
+ * Including Markdown processor to handle mark-up of conversations and title-
+ extraction.
+ * Links are prefixed with "Link:", to differentiate the content from post
+ titles. Wordpress will (by default) always link to the new posts
+ permalink from the title.
+
+## Known Issues
+
+ * Doesn't support Tumblr's new multiple-photos-per-post feature
+ * Wordpress.com does not import the HTML5 <audio> element
+ * Wordpress.com does not import any <embed> or <object> mark-up for videos.
View
180 index.php
@@ -2,6 +2,10 @@
define(T2W_VERSION, '0.3-benward');
+# Some pieces of content will have to be parsed into HTML where we have to add
+# HTML strucutre (e.g. around conversations)
+require_once("markdown.php");
+
# Check for valid input
$username = isset($_REQUEST['username']) && !empty($_REQUEST['username']) ? $_REQUEST['username'] : '';
@@ -11,48 +15,7 @@
<head>
<meta charset="utf-8">
<title>Tumblr2WordPress: Export Your Tumblr To WordPress</title>
- <style>
- body {
- max-width: 40em;
- font: 100%/150% Helvetica, Arial, sans-serif;
- }
- h1 {
- font-size: 130%;
- line-height: 150%;
- }
- dl {
- font-size: 80%;
- }
- dt {
- font-weight: bold;
- }
- form {
- margin: 5px 0;
- padding: 0;
- }
- fieldset {
- border: 0;
- -webkit-border-radius: 10px;
- -moz-border-radius: 10px;
- background-color: #eee;
- background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#fff));
- border: 3px #2D4261 solid;
- padding: 5px 15px;
- }
- legend {
- font-weight: bold;
- padding-top: 2.5em;
- margin-left: -2px;
- }
- fieldset ul {
- list-style-type: none;
- margin: 0;
- padding: 0;
- }
- p.donate {
- font-size: 80%;
- }
- </style>
+ <link rel="stylesheet" href="t2w.css" type="text/css">
</head>
<body>
<h1>Tumblr2WordPress: Export Your Tumblr to WordPress</h1>
@@ -165,6 +128,13 @@
<fieldset>
<legend>Ready?</legend>
<input type="submit" value="Export">
+ <p>After a short pause, your browser will download an XML file
+ containing your Tumblr posts, converted for use in Wordpress.
+ You should open this file in a text editor to see the
+ instructions on how to import your posts, and also check for
+ any warnings the exporter has generated for your posts. For
+ example, you may need to re-upload audio and image files from
+ your posts.</p>
</fieldset>
</form>
<h2>Notes and Help</h2>
@@ -209,6 +179,8 @@
$posts = array();
$feed = '';
$allTags = array();
+$warnings = array();
+$markdown = false;
# Tumblr Query Options:
switch($_REQUEST["filter"]) {
@@ -219,6 +191,7 @@
case "none":
# Do not post-process posts (leaves Markdown intact)
$filter = "&filter=none";
+ $markdown = true;
break;
default:
$filter = "";
@@ -230,16 +203,16 @@
# Permalink Format
switch($_REQUEST["permaform"]) {
case "combo":
- # Do not post-process posts (leaves Markdown intact)
+ # ID and Hyphenated Title
$permalink_format = "combined";
break;
case "text":
- # Plaintext Content
+ # Title
$permalink_format = "text";
break;
case "id":
default:
- # Plaintext Content
+ # Id only
$permalink_format = "id";
break;
}
@@ -272,14 +245,21 @@
}
# OK. Query the Tumblr API for the posts and get them all in 50-post batches:
-do {
- $url = 'http://'.$username.'.tumblr.com/api/read?start='. $i . '&num=50' . $filter;
- $file = file_get_contents($url);
- $feed = new SimpleXMLElement($file);
- $posts = array_merge($posts, $feed->xpath('posts//post'));
- $i = (int)$feed->posts->attributes()->start + 50;
-} while($i <= (int)$feed->posts["total"]);
-
+try {
+ do {
+ $url = 'http://'.$username.'.tumblr.com/api/read?start='. $i . '&num=50' . $filter;
+ $file = file_get_contents($url);
+ $feed = new SimpleXMLElement($file);
+ $posts = array_merge($posts, $feed->xpath('posts//post'));
+ $i = (int)$feed->posts->attributes()->start + 50;
+ } while($i <= (int)$feed->posts["total"]);
+}
+catch(Exception $e) {
+ echo "<h1>Error fetching Tumblr posts</h1>";
+ echo "<p>" . $e->getMessage() . "</p>";
+ echo "<p>$i posts fetched</p>";
+ die();
+}
function formatForWP($str)
{
global $type;
@@ -391,15 +371,9 @@ function formatEntryTitle(&$text, $strip=true) {
# we want to strip out the link mark-up…
# If raw input:
- if('none' == $_REQUEST["filter"]) {
+ if($markdown) {
# Run markdown:
- if(file_exists("markdown.php")) {
- require_once("markdown.php");
- $l = Markdown($l);
- }
- else {
- error_log("Couldn't import Markdown parser");
- }
+ $l = Markdown($l);
}
# Crudely check for <a>
$contains_link = !(false === stripos('<a', $l));
@@ -409,17 +383,15 @@ function formatEntryTitle(&$text, $strip=true) {
# If there has been no other content so far (allowing one block
# for quote attribution), and we're stripping titles out of the
# text to avoid duplication, do it:
- $lines = array_splice($lines, $i, 1);
- $text = implode('\n', $lines);
+ array_splice($lines, $i, 1);
+ $text = implode("\n", $lines);
}
# In the final return, strip not-inline HTML tags.
- return str_replace('\n', '', strip_tags(
- $l,
- '<abbr><acronym><i><b><strong><em><code><kbd><samp><span><q>
- <cite><dfn><ins><del><mark><meter><rp><rt><ruby><sub><sup>
- <time><var>'
- ));
+ return str_replace('\n', '', strip_tags($l));
+ #'<abbr><acronym><i><b><strong><em><code><kbd><samp><span><q>
+ # <cite><dfn><ins><del><mark><meter><rp><rt><ruby><sub><sup>
+ # <time><var>'
}
else {
$block_count++;
@@ -433,6 +405,43 @@ function formatEntryTitle(&$text, $strip=true) {
return '';
}
+# Check if a media URL is hosted on Tumblr's server and record
+# a warning if so.
+function checkMediaForWarnings($media_url, $post, $type) {
+ global $warnings;
+ if(false !== stripos($media_url, 'tumblr.com')) {
+ # Audio file is hosted on Tumblr, and won't be accessible.
+ $warnings[] = array(
+ 'url' => $post,
+ 'error' => "Post references a $type file hosted on
+ Tumblr.com. This file will not be accessible
+ from a post hosted on your own site. You need to host
+ `{$media_url}`
+ elsewhere and update the post."
+ );
+ }
+}
+
+function getWarnings() {
+ global $warnings;
+ if(!empty($warnings)):
+ ?>
+<!-- Warnings: -->
+<!-- The following warnings were recorded when exporting your Tumblr posts, -->
+<!-- and may require attention and manual intervention to full restore -->
+<!-- your posts. -->
+
+ <?php
+ foreach($warnings as $id=>$data) {
+ echo <<<WARNING
+<!-- Warning for: {$data['url']} -->
+<!-- {$data['error']} -->
+
+WARNING;
+ }
+ endif;
+}
+
header('content-type: text/xml');
header("content-disposition: attachment; filename=tumblr_$username.xml");
?>
@@ -476,8 +485,9 @@ function formatEntryTitle(&$text, $strip=true) {
<wp:category_parent></wp:category_parent>
<wp:cat_name><![CDATA[Uncategorized]]></wp:cat_name>
</wp:category>
+
<?php
-ob_start();
+ ob_start();
foreach($posts as $post)
{
?>
@@ -514,11 +524,10 @@ function formatEntryTitle(&$text, $strip=true) {
case "photo":
$post_content = $post->{'photo-caption'};
-
?>
<title><?php echo htmlspecialchars(formatEntryTitle(&$post_content)) ?></title>
<description></description>
- <content:encoded><![CDATA[<img src="<?php echo $post->{'photo-url'} ?>" alt=""/>
+ <content:encoded><![CDATA[<img src="<?php echo $post->{'photo-url'} ?>" alt="">
<?php echo formatForWP($post_content) ?>]]></content:encoded>
<wp:post_name><?php echo formatPermalinkSlug($post->attributes()->id, $post->{'photo-caption'}) ?></wp:post_name>
@@ -527,10 +536,19 @@ function formatEntryTitle(&$text, $strip=true) {
case "quote":
$post_content = $post->{'quote-source'};
+
+ # Mark-up the quote:
+ if($markdown) {
+ # Add Markdown quotes, and hard line-breaks
+ $quote_text = "> " . str_replace("\n", " \n> ", $post->{'quote-text'}) . "\n";
+ }
+ else {
+ $quote_text = "<blockquote>" . $post->{'quote-text'} . "</blockquote>";
+ }
?>
<title><?php echo htmlspecialchars(formatEntryTitle(&$post_content)) ?></title>
<description></description>
- <content:encoded><![CDATA[<blockquote><?php echo $post->{'quote-text'} ?></blockquote>
+ <content:encoded><![CDATA[<?php echo $quote_text ?>
<?php echo formatForWP($post_content) ?>]]></content:encoded>
<wp:post_name><?php echo formatPermalinkSlug($post->attributes()->id, str_replace('&#8220;','',str_replace('&#8221;','',$post->{'quote-text'}))) ?></wp:post_name>
@@ -540,26 +558,28 @@ function formatEntryTitle(&$text, $strip=true) {
case "link": ?>
<title><?php echo htmlspecialchars(strip_tags($post->{'link-text'})) ?></title>
<description><?php echo htmlspecialchars(strip_tags($post->{'link-description'})) ?></description>
- <content:encoded><![CDATA[<a href="<?php echo $post->{'link-url'} ?>"><?php echo $post->{'link-text'} ?></a>
+ <content:encoded><![CDATA[Link: <a href="<?php echo $post->{'link-url'} ?>"><?php echo $post->{'link-text'} ?></a>
<?php echo formatForWP($post->{'link-description'}) ?>]]></content:encoded>
<wp:post_name><?php echo formatPermalinkSlug($post->attributes()->id, $post->{'link-text'}) ?></wp:post_name>
<?php
break;
-
case "conversation": ?>
<title><?php echo htmlspecialchars(strip_tags($post->{'conversation-title'})) ?></title>
<description></description>
<content:encoded><![CDATA[<?php
- foreach($post->{'conversation-line'} as $line) { ?>
- <cite><?php echo $line->attributes()->label ?></cite>
- <q><?php echo $line ?></q><br/><?php } ?>]]></content:encoded>
+ foreach($post->{'conversation-line'} as $line) {
+ ?><cite><?php
+ echo ($markdown) ? preg_replace('/(<\/?p>|\n)/', '', Markdown($line->attributes()->label)) : $line->attributes()->label
+ ?></cite> <q><?php
+ echo ($markdown) ? preg_replace('/(<\/?p>|\n)/', '', Markdown($line)) : $line;
+ ?></q><br>
+ <?php } ?>]]></content:encoded>
<wp:post_name><?php echo formatPermalinkSlug($post->attributes()->id, $post->{'conservation-title'}) ?></wp:post_name>
<?php
break;
-
case "video":
$post_content = $post->{'video-caption'};
?>
@@ -583,6 +603,7 @@ function formatEntryTitle(&$text, $strip=true) {
case "audio":
$post_content = $post->{'audio-caption'};
$audio_file = preg_match('/audio_file=([\S\s]*?)(&|")/', $post->{'audio-player'}, $matches);
+ checkMediaForWarnings($matches[1], $post->attributes()->url, "audio");
?>
<title><?php echo htmlspecialchars(formatEntryTitle(&$post_content)) ?></title>
<description></description>
@@ -599,6 +620,7 @@ function formatEntryTitle(&$text, $strip=true) {
}
$out = ob_get_contents();
ob_end_clean();
+ getWarnings();
getAllTags();
echo $out;
?>
View
40 t2w.css
@@ -0,0 +1,40 @@
+body {
+ max-width: 40em;
+ font: 100%/150% Helvetica, Arial, sans-serif;
+}
+h1 {
+ font-size: 130%;
+ line-height: 150%;
+}
+dl {
+ font-size: 80%;
+}
+dt {
+ font-weight: bold;
+}
+form {
+ margin: 5px 0;
+ padding: 0;
+}
+fieldset {
+ border: 0;
+ -webkit-border-radius: 10px;
+ -moz-border-radius: 10px;
+ background-color: #eee;
+ background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#fff));
+ border: 3px #2D4261 solid;
+ padding: 5px 15px;
+}
+legend {
+ font-weight: bold;
+ padding-top: 2.5em;
+ margin-left: -2px;
+}
+fieldset ul {
+ list-style-type: none;
+ margin: 0;
+ padding: 0;
+}
+p.donate {
+ font-size: 80%;
+}
Please sign in to comment.
Something went wrong with that request. Please try again.