<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -5,19 +5,27 @@ require 'rubygems'
 require 'uri'
 require 'mechanize'
 
+# go easy
+def snooze; sleep 2; end
 
 def page_history(page, offset = '')
   STDERR.print &quot;#{offset}.. &quot;; STDERR.flush
   
   rvlimit = 500 # revisions per page
-  url = &quot;http://en.wikipedia.org/w/api.php?action=query&amp;prop=revisions&amp;titles=#{URI.escape(page)}&amp;rvprop=timestamp|user&amp;rvlimit=#{rvlimit}&amp;format=xml&quot;
+  url = &quot;http://en.wikipedia.org/w/api.php?action=query&amp;prop=revisions&amp;titles=#{URI.escape(page)}&amp;rvprop=timestamp|user|size&amp;rvlimit=#{rvlimit}&amp;format=xml&quot;
   url += &quot;&amp;rvstartid=#{offset}&quot; unless offset.empty?
+  snooze
 
+  sleep 0.5 # easy
   agent = WWW::Mechanize.new # FIXME, don't always need to reinitialize 
   doc = Hpricot.XML(agent.get(url).body)
   revisions = (doc/'rev').map { |rev| 
     # STDERR.puts rev['timestamp']
-    {:filename =&gt; page, :date =&gt; Time::parse(rev['timestamp']).to_i*1000, :author =&gt; rev['user'] } 
+    # STDERR.puts Time::parse(rev['timestamp']).to_s    
+    weight = (rev['size'].to_f/100.to_f).ceil rescue 1
+    weight = 1 if weight == 0 #FIXME
+    STDERR.puts &quot;#{rev['timestamp']}: #{rev['size']} =&gt; #{weight}&quot;
+    {:filename =&gt; page, :date =&gt; Time::parse(rev['timestamp']).to_i*1000, :author =&gt; rev['user'], :weight =&gt; weight } 
   } || []
 
   rvstartid = (doc/'query-continue'/'revisions')[0]['rvstartid'] rescue nil
@@ -35,6 +43,7 @@ def user_history(username, offset = '')
   url += &quot;&amp;offset=#{offset}&quot; unless offset.empty?
   agent = WWW::Mechanize.new
   agent.user_agent = &quot;WikiSwarm &lt;http://github.com/jamiew/wikiswarm/&gt;&quot;
+  snooze
   doc = agent.get(url)
   revisions = (doc/'#bodyContent li').map { |li| 
     
@@ -45,18 +54,18 @@ def user_history(username, offset = '')
     comment = (li/'span').remove
     date = li.innerHTML.split('(')[0][0..-2]
     username = username.gsub('User:','')
-    
+    # weight = rev['size'] || 1
+    weight = 1
     # puts (li/'span').delete 
     # {:filename =&gt; (li
     
-    
-    { :filename =&gt; filename, :date =&gt; Time::parse(date).to_i*1000, :author =&gt; username }    
+    { :filename =&gt; filename, :date =&gt; Time::parse(date).to_i*1000, :author =&gt; username, :weight =&gt; weight }    
   }.sort_by { |f| f[:date] }
 
   # puts (doc/'a.mw-nextlink').inspect  
 
   link = (doc/'.mw-nextlink')[0]['href'] rescue nil
-  STDERR.puts link.inspect
+  # STDERR.puts link.inspect
   
   rvstartid = link.match('.*offset=(.*)\&amp;.*')[1] rescue nil
   revisions += user_history(username, rvstartid) || [] if rvstartid
@@ -84,7 +93,7 @@ pages.each { |page|
 
 revisions.sort_by { |r| r[:date] }.each { |rev|
   # code_swarm wants unixtime in milliseconds
-  puts %{&lt;event date=&quot;#{rev[:date]}&quot; filename=&quot;#{rev[:filename]}&quot; author=&quot;#{rev[:author]}&quot; /&gt;}
+  puts %{&lt;event date=&quot;#{rev[:date]}&quot; filename=&quot;#{rev[:filename]}&quot; author=&quot;#{rev[:author]}&quot; weight=&quot;#{rev[:weight]}&quot; /&gt;}
 }
 puts '&lt;/file_events&gt;'
 exit 0</diff>
      <filename>wikipedia.rb</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>72a7b2b80ff7676caae6b9415888e35d879460f2</id>
    </parent>
  </parents>
  <author>
    <name>Jamie Wilkinson</name>
    <email>jamie@tramchase.com</email>
  </author>
  <url>http://github.com/jamiew/wikiswarm/commit/21bc3788fc3c4323a8ee7407d30a50b6f2c79432</url>
  <id>21bc3788fc3c4323a8ee7407d30a50b6f2c79432</id>
  <committed-date>2008-11-18T20:49:54-08:00</committed-date>
  <authored-date>2008-11-18T20:49:54-08:00</authored-date>
  <message>weights</message>
  <tree>9e533f48f26e3da45414e459ff44133c013674f2</tree>
  <committer>
    <name>Jamie Wilkinson</name>
    <email>jamie@tramchase.com</email>
  </committer>
</commit>
