<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -1,15 +1,20 @@
 Feed2Imap 0.8 (XX/XX/2006)
 ============================
-* Fixed a small bug in the duplicate items handling which could have caused
-  some items to be ignored if they had the same url but different content.
-* New always-new flag in the config file to consider all items as new (for
-  feeds where items are wrongly marked as updated, e.g mediawiki feeds).
-  See example configuration file for more information.
 * Uses the http_proxy environment variable to determine the proxy server
   if available. (fixes gna bug #5820, all credits go to Boyd Adamson
   &lt;boyd-adamson@usa.net&gt;)
 * Fixes flocking on Solaris (fixes gna bug #5819). Again, all credits go to
   Boyd Adamson &lt;boyd-adamson@usa.net&gt;.
+* Rewrite of the &quot;find updated and new items&quot; code. It should work much better
+  now. Also, a debug-updated configuration variable was added to make it
+  easier to debug those issues.
+* New always-new flag in the config file to consider all items as new (for
+  feeds where items are wrongly marked as updated, e.g mediawiki feeds).
+  See example configuration file for more information (fixes Debian bug
+  #366878).
+* When disconnecting from the IMAP server, don't display an exception in
+  non-verbose mode if the &quot;connection is reset by peer&quot; (fixes Debian bug
+  #367282).
 
 Feed2Imap 0.7 (17/02/2006)
 ============================</diff>
      <filename>ChangeLog</filename>
    </modified>
    <modified>
      <diff>@@ -1,13 +1,21 @@
-# name is the name of the feed (must be unique)
-# url is the HTTP[S] address where the feed has to be fetched
-# target is the IMAP URI where to put emails
-# min-frequency (in HOURS) is the minimum frequency with which this particular
-#      feed will be fetched
+# Global options:
+# dumpdir: (for debugging purposes) directory where all fetched feeds will be
+#   dumped.
+# debug-updated: (for debugging purposes) if true, display a lot of information
+#   about the &quot;updated-items&quot; algorithm.
+# 
+# Per-feed options:
+# name: name of the feed (must be unique)
+# url: HTTP[S] address where the feed has to be fetched
+# target: the IMAP URI where to put emails. Should start with imap:// for IMAP
+#   and imaps:// for IMAPS.
+# min-frequency: (in HOURS) is the minimum frequency with which this particular
+#   feed will be fetched
 # disable: if set to something, the feed will be ignored
-# always-new: feed2imap tries to use a clever algorithm to determine whether an item
-#      is new or has been updated. It doesn't work well with some web apps like
-#      mediawiki. When this flag is enabled, all items which don't match exactly
-#      a previously downloaded item are considered as new items.
+# always-new: feed2imap tries to use a clever algorithm to determine whether
+#   an item is new or has been updated. It doesn't work well with some web apps
+#   like mediawiki. When this flag is enabled, all items which don't match
+#   exactly a previously downloaded item are considered as new items.
 #
 # If your login contains an @ character, replace it with %40. Other reserved
 # characters can be escaped in the same way (see man ascii to get their code)</diff>
      <filename>data/doc/feed2imap/examples/feed2imaprc</filename>
    </modified>
    <modified>
      <diff>@@ -17,20 +17,28 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 =end
 
+# debug mode
+$updateddebug = false
+
 # This class manages a cache of items
 # (items which have already been seen)
 
 require 'digest/md5'
 
 class ItemCache
-  def initialize
+  def initialize(debug = false)
     @channels = {}
     @@cacheidx = 0
+    $updateddebug = debug
     self
   end
 
   # Returns the really new items amongst items
   def get_new_items(id, items, always_new = false)
+    if $updateddebug
+      puts &quot;=======================================================&quot;
+      puts &quot;GET_NEW_ITEMS FOR #{id}... (#{Time::now})&quot;
+    end
     @channels[id] ||= CachedChannel::new
     return @channels[id].get_new_items(items, always_new)
   end
@@ -92,7 +100,8 @@ end
 
 class CachedChannel
   # Size of the cache for each feed
-  CACHESIZE = 50
+  # 100 items should be enough for everybody, even quite busy feeds
+  CACHESIZE = 100
 
   attr_accessor :lastcheck, :items
 
@@ -113,8 +122,6 @@ class CachedChannel
   # @nbnewitems is set by get_new_items, and is used to limit the number
   # of (old) items serialized.
 
-  UPDATEDDEBUG = false
-
   # Returns the really new items amongst items
   def get_new_items(items, always_new = false)
     # save number of new items
@@ -124,6 +131,10 @@ class CachedChannel
     updateditems = []
     @itemstemp = @items
     items.each { |i| i.cacheditem ||= CachedItem::new(i) }
+    if $updateddebug
+      puts &quot;-------Items downloaded before dups removal (#{items.length}) :----------&quot;
+      items.each { |i| puts &quot;#{i.cacheditem.to_s}&quot; }
+    end
     # remove dups
     dups = true
     while dups
@@ -131,7 +142,7 @@ class CachedChannel
       for i in 0...items.length do
         for j in i+1...items.length do
           if items[i].cacheditem == items[j].cacheditem
-            if UPDATEDDEBUG
+            if $updateddebug
               puts &quot;## Removed duplicate #{items[j].cacheditem.to_s}&quot;
             end
             items.delete_at(j)
@@ -143,10 +154,10 @@ class CachedChannel
       end
     end
     # debug : dump interesting info to stdout.
-    if UPDATEDDEBUG
-      puts &quot;-------Items downloaded :----------&quot;
+    if $updateddebug
+      puts &quot;-------Items downloaded after dups removal (#{items.length}) :----------&quot;
       items.each { |i| puts &quot;#{i.cacheditem.to_s}&quot; }
-      puts &quot;-------Items already there :----------&quot;
+      puts &quot;-------Items already there (#{@items.length}) :----------&quot;
       @items.each { |i| puts &quot;#{i.to_s}&quot; }
       puts &quot;Items always considered as new: #{always_new.to_s}&quot;
     end
@@ -168,7 +179,7 @@ class CachedChannel
         # Try to find an updated item
         @items.each do |j|
           # Do we need a better heuristic ?
-          if i.link and i.link == j.link
+          if j.is_ancestor_of(i)
             i.cacheditem.index = j.index
             i.cacheditem.updated = true
             updateditems.push(i)
@@ -187,7 +198,7 @@ class CachedChannel
       # add i.cacheditem to @itemstemp
       @itemstemp.unshift(i.cacheditem)
     end
-    if UPDATEDDEBUG
+    if $updateddebug
       puts &quot;-------New items :----------&quot;
       newitems.each { |i| puts &quot;#{i.cacheditem.to_s}&quot; }
       puts &quot;-------Updated items :----------&quot;
@@ -200,6 +211,9 @@ class CachedChannel
     # too old items must be dropped
     n = @nbnewitems &gt; CACHESIZE ? @nbnewitems : CACHESIZE
     @items = @itemstemp[0..n]
+    if $updateddebug
+      puts &quot;Committing: new items: #{@nbnewitems} / items kept: #{@items.length}&quot;
+    end
     @itemstemp = []
     self
   end
@@ -212,13 +226,15 @@ end
 
 # This class is the only thing kept in the cache
 class CachedItem
-  attr_reader :title, :link, :hash
+  attr_reader :title, :link, :creator, :date, :hash
   attr_accessor :index
   attr_accessor :updated
 
   def initialize(item)
     @title = item.title
     @link = item.link
+    @date = item.date
+    @creator = item.creator
     if item.content.nil?
       @hash = nil
     else
@@ -227,14 +243,29 @@ class CachedItem
   end
 
   def ==(other)
-    @title == other.title and @link == other.link and @hash == other.hash
+    if $updateddebug and @title =~ /e325/ and other.title =~ /e325/
+      puts &quot;Comparing #{self.to_s} and #{other.to_s}:&quot;
+      puts &quot;Title: #{@title == other.title}&quot;
+      puts &quot;Link: #{@link == other.link}&quot;
+      puts &quot;Creator: #{@creator == other.creator}&quot;
+      puts &quot;Date: #{@date == other.date}&quot;
+      puts &quot;Hash: #{@hash == other.hash}&quot;
+    end
+    @title == other.title and @link == other.link and
+        (@creator.nil? or other.creator.nil? or @creator == other.creator) and
+	(@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash
   end
 
   def create_index
     @index = ItemCache.getindex
   end
 
+  def is_ancestor_of(other)
+    (@link and other.link and @link == other.link) and
+      ((@creator and other.creator and @creator == other.creator) or (@creator.nil?))
+  end
+
   def to_s
-    &quot;\&quot;#{@title}\&quot; #{@link} #{@hash}&quot;
+    &quot;\&quot;#{@title}\&quot; #{@creator}/#{@date} #{@link} #{@hash}&quot;
   end
 end</diff>
      <filename>lib/feed2imap/cache.rb</filename>
    </modified>
    <modified>
      <diff>@@ -26,7 +26,7 @@ DEFCACHE = ENV['HOME'] + '/.feed2imap.cache'
 
 # Feed2imap configuration
 class F2IConfig
-  attr_reader :imap_accounts, :cache, :feeds, :dumpdir
+  attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug
 
   # Load the configuration from the IO stream
   # TODO should do some sanity check on the data read.
@@ -36,6 +36,7 @@ class F2IConfig
     @dumpdir = @conf['dumpdir'] || nil
     @conf['feeds'] ||= []
     @feeds = []
+    @updateddebug =  (@conf['debug-updated'] and @conf['debug-updated'] != 'false')
     @imap_accounts = ImapAccounts::new
     @conf['feeds'].each do |f|
       if f['disable'].nil?</diff>
      <filename>lib/feed2imap/config.rb</filename>
    </modified>
    <modified>
      <diff>@@ -61,7 +61,7 @@ class Feed2Imap
     end
     # init cache
     @logger.info('Initializing cache')
-    @cache = ItemCache::new
+    @cache = ItemCache::new(@config.updateddebug)
     if not File::exist?(@config.cache + '.lock')
       f = File::new(@config.cache + '.lock', 'w')
       f.close
@@ -187,7 +187,7 @@ class Feed2Imap
       begin
         ac.disconnect
       rescue
-        @logger.fatal(&quot;Exception caught while closing connection to #{ac.to_s}: #{$!}&quot;)
+        @logger.info(&quot;Exception caught while closing connection to #{ac.to_s}: #{$!}&quot;)
       end
     end
   end</diff>
      <filename>lib/feed2imap/feed2imap.rb</filename>
    </modified>
    <modified>
      <diff>@@ -43,7 +43,8 @@
 &lt;/refsect1&gt;
 &lt;refsect1&gt;
 	&lt;title&gt;BUGS&lt;/title&gt;
-	&lt;para&gt;This manpage should probably give more details.&lt;/para&gt;
+	&lt;para&gt;This manpage should probably give more details. However, the example configuration file is
+very well documented.&lt;/para&gt;
 &lt;/refsect1&gt;
 &lt;refsect1&gt;
 	&lt;title&gt;SEE ALSO&lt;/title&gt;</diff>
      <filename>manpages/feed2imaprc.xml</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>3aba0ce3e74718a8d89d03f69f061158139ad9fa</id>
    </parent>
  </parents>
  <author>
    <name>lnu</name>
    <email>lnu@f70e237a-67f3-0310-a06c-d2b8a7116972</email>
  </author>
  <url>http://github.com/ln/feed2imap/commit/8ed0ec9fd97511e847315d44cf0070ced31a9557</url>
  <id>8ed0ec9fd97511e847315d44cf0070ced31a9557</id>
  <committed-date>2006-06-18T01:59:16-07:00</committed-date>
  <authored-date>2006-06-18T01:59:16-07:00</authored-date>
  <message>pre-release cleanup

git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@97 f70e237a-67f3-0310-a06c-d2b8a7116972</message>
  <tree>96fea79a9dab1f477bc28e6e86799cea516cd86b</tree>
  <committer>
    <name>lnu</name>
    <email>lnu@f70e237a-67f3-0310-a06c-d2b8a7116972</email>
  </committer>
</commit>
