<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -1,3 +1,4 @@
+.DS_Store
 config/database.yml
 config/tumblr.yml
 config/deploy.rb</diff>
      <filename>.gitignore</filename>
    </modified>
    <modified>
      <diff>@@ -2,5 +2,5 @@
   :authenticate: true
   :email: you@example.com
   :password: secret
-  :group: radarchive.tumblr.com
-  :group_name: Radarchive
+  :group:
+  :group_name:</diff>
      <filename>config/tumblr.yml.sample</filename>
    </modified>
    <modified>
      <diff>@@ -9,20 +9,21 @@ require 'yaml'
 require 'active_record'
 require 'models'
 
-# Tumblr config
+# Load Tumblr config
 ($config ||= {})[:tumblr] = YAML.load_file(File.dirname(__FILE__)+&quot;/config/tumblr.yml&quot;)[:tumblr]
 
-# Connect to database
+# Load DB config &amp; connect
 $config[:database] = YAML.load_file( File.dirname(__FILE__)+'/config/database.yml')
 ActiveRecord::Base.logger = Logger.new('../database.log')
 ActiveRecord::Base.colorize_logging = true
 ActiveRecord::Base.establish_connection($config[:database][ (ENV['MERB_ENV'] || :development).to_sym])
 
 # Should we log in first? Can get reblog data that way
-def authenticate?; $config[:tumblr][:authenticate] || false; end
+def authenticate?; !$config[:tumblr][:authenticate].nil? || false; end
+def authenticated?; return authenticate?; end # TODO, detect if we are authenticated
 
 
-# FIXME put or get from a lib or something geeze
+# FIXME get from a lib or something! Geeze
 class String
   def strip_html(allowed = ['a','img','p','br','i','b','u','ul','li'])
   	str = self.strip || ''
@@ -33,7 +34,7 @@ end
 
 ##########
 # save a grip of radar posts to the database
-# FIXME this is one gigantic function wowsa o_O
+# TODO this one gigantic function should be broken up O_o
 def save(posts)
 
   added, skipped = 0, 0
@@ -41,20 +42,21 @@ def save(posts)
 
     # deduce the post type via class
     author, data = {}, {}    
-    data[:type] = /\s(.*)_post\s?/.match( post.attributes['class'] )[1].to_s rescue 'photo' # should default to :default
+    data[:type] = /\s(.*)_post\s?/.match( post.attributes['class'] )[1].to_s rescue 'photo' # TODO should default to :default
 
+    # all posts except 'photo' contain all the info we need
+    # 'photo' posts need to visit their permalink and get the full image path and reblog link
     case data[:type] 
     when 'photo'
       author[:name] = post['href'].split('/')[2].gsub('.tumblr.com','')
       author[:url] = post.attributes['href'].gsub(/post.*$/,'')
-      data[:url] = post.attributes['href'] #FIXME? or parse later. hard to tell which user they are...
-
-      # data[:content] = post.search('img')[0].to_s
+      data[:url] = post.attributes['href']
 
       unless Post.exists?(:type =&gt; 'photo', :url =&gt; data[:url])
+
         # fetch the post's page to get the full details
         puts &quot;Paying a visit to: #{data[:url]}&quot;
-        page = $agent.get(data[:url]) rescue (puts &quot;Failed to get second visit: #{$!}&quot;; next)
+        page = $agent.get(data[:url]) rescue (puts &quot;Failed to get permalink page: #{$!}&quot;; next)
 
         # try a few variations on what their content div might be called... sheesh
         photo_divs = ['.post_container','.photo', '.post']
@@ -62,7 +64,6 @@ def save(posts)
         photo_divs.each { |div|
          photo_div = div if photo_div.empty? &amp;&amp; !page.search(&quot;#{div} img&quot;).empty? #fingers crossed
         }
-        # puts &quot;Settled on photo_div = #{photo_div}&quot;
 
         # find image and &quot;source&quot; (description)
         data[:content] = page.search(photo_div+' img')[0].to_s rescue nil # first image? TODO find the biggest image
@@ -80,12 +81,11 @@ def save(posts)
         end
       end      
 
-    #when 'quote'
-    #when 'video' # TODO make request for YouTube stats
+    # all other kinds of posts: regular, quote, video, link, conversation
     else
       
       # capture reblog link if we're authenticating
-      # FIXME this is very loosely targetted and prone to breaking
+      # TODO this is very loosely targetted and prone to break
       if authenticate?
         first_link = post.search('a:first').remove
         data[:reblog_link] = first_link[0]['href']
@@ -93,16 +93,18 @@ def save(posts)
       end
       
       # capture the rest of the content &amp; metadata
-      link = post.search('.attribution').remove.search('a')[0]
+      link = post.search('.attribution a').remove[0]
       author[:name] = link.innerHTML
       author[:url] = link['href'].gsub(/\/post.*$/,'')
 
-      # there's only sometimes a permalink, lame. TODO
+      # there's only sometimes a permalink O_o -- TESTME
       link = post.search('a.more')[0]
       data[:url] = link['href'] unless (link.nil? || (link['href'] == author[:url]))
       data[:url] ||= post.search('a')[0]['href'] rescue nil
 
-      #TODO parse into a proper hash a la tumblr? now is the time if we want to at all
+      # TODO parse into a proper hash a la tumblr, with content, source, etc broken up? 
+      # now is the time if we want to at all
+      
       data[:content] = post.innerHTML.strip
       #puts &quot;content = #{data[:content].inspect}&quot;
     end
@@ -112,7 +114,6 @@ def save(posts)
     user.save! if user.new_record?
     data[:user_id] = user.id
   
-    #puts &quot;#{data[:user_id]} #{data[:type]} #{data[:url]} userURL = #{author[:url]}&quot;
     obj = Post.find_or_initialize_by_type_and_user_id_and_url(data)
     if obj.new_record?
       # next if obj.reblog_link.nil? or obj.reblog_link.empty?
@@ -125,7 +126,7 @@ def save(posts)
       skipped += 1
     end
   }
-  puts &quot;#{added} new posts, #{skipped} skipped.&quot;
+  puts &quot;#{added} new posts. #{skipped} skipped.&quot;
 
 end
 
@@ -135,7 +136,7 @@ end
 #########
 # post to tumblr (by reblogging it to your specified group)
 def post_to_tumblr(post)
-  puts &quot;post to tumblr: #{post.id}, #{post.attributes['type']}, #{post.url}, reblog_link =&gt; #{post.reblog_link}&quot;
+  puts &quot;Post to Tumblr: #{post.id}, #{post.attributes['type']}, #{post.url}, reblog_link =&gt; #{post.reblog_link}&quot;
   
   raise RuntimeError, &quot;Can't post w/o a reblog link&quot; if post.reblog_link.nil? or post.reblog_link.empty? or post.reblog_link == '/'
   
@@ -174,7 +175,8 @@ puts &quot;----------&quot;
 puts Time.now
 
 $agent = WWW::Mechanize.new
-$agent.user_agent = 'Radarchive &lt;http://radarchive.tumblr.com&gt;'
+$agent.user_agent = &quot;Tumblr Radar Scraper 1.0 (#{$config[:tumblr][:email] || 'anonymous'})&quot;
+puts $agent.user_agent
 
 # log in (so we can get reblog links)
 if authenticate? 
@@ -189,9 +191,9 @@ if authenticate?
     # test if we're logged in OK by number of links on an unlogged-in-page
     # FIXME. use some kind of auth header perhaps, or the Tumblr API
     links = $agent.get('http://www.tumblr.com/dashboard').links
-    puts &quot;Num of links on dashboard: #{links.length}&quot;
+    puts &quot;Dashboard link count: #{links.length}&quot;
     if links.length &lt;= 13
-      raise RuntimeError, &quot;Could not reach the Dashboard, cookies are probably no longer valid!&quot; 
+      raise RuntimeError, &quot;Could not reach the Dashboard; cookies are probably no longer valid&quot; 
     end
   rescue  
     puts &quot;#{$!}... logging in...&quot;
@@ -199,11 +201,14 @@ if authenticate?
     form = page.forms[0]
     form.email = $config[:tumblr][:email]
     form.password = $config[:tumblr][:password]
-    $agent.submit(form)
-    puts &quot;done, saving cookies&quot;
+    page = $agent.submit(form)
+    
+    # did we login OK? TODO check for redirect code, not just title
+    raise RuntimeError, &quot;Login failed! Check your credentials.&quot; unless (page/:title).innerHTML == &quot;Logging in...&quot;
+    
+    puts &quot;Authenticated! Saving cookies...&quot;
     $agent.cookie_jar.save_as('cookies.yml') # Save the cookies
-  ensure
-    puts &quot;Authenticated!&quot;
+
   end
 end
 </diff>
      <filename>runner.rb</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>f2d66fff498a494118237901a1abe21bc10c5620</id>
    </parent>
  </parents>
  <author>
    <name>Jamie Wilkinson</name>
    <email>jamie@tramchase.com</email>
  </author>
  <url>http://github.com/jamiew/tumblr-radarchive/commit/da7fbb28a646f48205b0147235c44034a7a5c177</url>
  <id>da7fbb28a646f48205b0147235c44034a7a5c177</id>
  <committed-date>2008-08-15T15:12:01-07:00</committed-date>
  <authored-date>2008-08-15T15:12:01-07:00</authored-date>
  <message>add logged-in checks to runner, clean up some comments, and remove a 'default' group from the tumblr conf sample</message>
  <tree>0cd6ab0b303c6c63bf4b0190033b0ebcd9d74c25</tree>
  <committer>
    <name>Jamie Wilkinson</name>
    <email>jamie@tramchase.com</email>
  </committer>
</commit>
