<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
-from catalog.importer.db_read import withKey, get_things, get_mc
-from catalog.read_rc import read_rc
-from catalog.utils import key_int, match_with_bad_chars, pick_best_author, remove_trailing_number_dot
+from openlibrary.catalog.importer.db_read import withKey, get_things, get_mc
+from openlibrary.catalog.read_rc import read_rc
+from openlibrary.catalog.utils import key_int, match_with_bad_chars, pick_best_author, remove_trailing_number_dot
 from unicodedata import normalize
 import web, re, sys, codecs, urllib
 sys.path.append('/home/edward/src/olapi')
 from olapi import OpenLibrary, unmarshal
-from catalog.utils.edit import fix_edition
-from catalog.utils.query import query_iter
+from openlibrary.catalog.utils.edit import fix_edition
+from openlibrary.catalog.utils.query import query_iter
 
 sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
 
@@ -54,6 +54,7 @@ def update_edition(key, old, new):
     authors = []
     print 'current authors:', e['authors']
     for cur in e['authors']:
+        cur = cur['key']
         print old, cur in old
         a = new if cur in old else cur
         print cur, '-&gt;', a</diff>
      <filename>openlibrary/catalog/author/merge.py</filename>
    </modified>
    <modified>
      <diff>@@ -15,6 +15,9 @@ rc = read_rc()
 
 re_loc = re.compile('^(ia\d+\.us\.archive\.org):(/\d/items/(.*))$')
 
+class NoMARCXML:
+    pass
+
 def urlopen_keep_trying(url):
     for i in range(3):
         try:
@@ -49,26 +52,35 @@ def find_item(ia):
     return (ia_host, ia_path)
 
 def get_ia(ia):
+    ia = ia.strip() # 'cyclopdiaofedu00kidd '
     # read MARC record of scanned book from archive.org
     # try the XML first because it has better character encoding
     # if there is a problem with the XML switch to the binary MARC
     xml_file = ia + &quot;_marc.xml&quot;
     loc = ia + &quot;/&quot; + xml_file
-    for attempt in range(3):
-        if os.path.exists(xml_path + xml_file):
-            f = open(xml_path + xml_file)
-        else:
+    if os.path.exists(xml_path + xml_file):
+        f = open(xml_path + xml_file)
+    else:
+        try:
             f = urlopen_keep_trying(base + loc)
-        if f:
-            try:
-                return loc, read_xml.read_edition(f)
-            except read_xml.BadXML:
-                pass
-            except xml.parsers.expat.ExpatError:
-                print 'XML parse error:', base + loc
-                pass
-        sleep(2)
+        except urllib2.HTTPError, error:
+            if error.code == 404:
+                raise NoMARCXML
+            else:
+                raise
+    if f:
+        try:
+            return loc, read_xml.read_edition(f)
+        except read_xml.BadXML:
+            pass
+        except xml.parsers.expat.ExpatError:
+            print 'IA:', `ia`
+            print 'XML parse error:', base + loc
+            pass
+    if '&lt;title&gt;Internet Archive: Page Not Found&lt;/title&gt;' in urllib2.urlopen(base + loc).read(200):
+        raise NoMARCXML
     url = base + ia + &quot;/&quot; + ia + &quot;_meta.mrc&quot;
+    print url
     try:
         f = urlopen_keep_trying(url)
     except urllib2.URLError:
@@ -140,7 +152,9 @@ def get_from_archive(locator):
     assert 0 &lt; length &lt; 100000
 
     ureq = urllib2.Request(url, None, {'Range':'bytes=%d-%d'% (r0, r1)},)
-    return urlopen_keep_trying(ureq).read(100000)
+    f = urlopen_keep_trying(ureq)
+    if f:
+        return f.read(100000)
 
 def get_from_local(locator):
     try:</diff>
      <filename>openlibrary/catalog/get_ia.py</filename>
    </modified>
    <modified>
      <diff>@@ -341,6 +341,8 @@ for part, size in files(archive_id):
         except AssertionError:
             print loc
             raise
+        if not edition:
+            continue
         if edition['title'] == 'See.':
             print 'See.', edition
             continue</diff>
      <filename>openlibrary/catalog/importer/import_marc.py</filename>
    </modified>
    <modified>
      <diff>@@ -1,9 +1,9 @@
 #!/usr/local/bin/python2.5
 import web, dbhash, sys
 import simplejson as json
-from catalog.load import add_keys
+from openlibrary.catalog.load import add_keys
 from copy import deepcopy
-from catalog.merge.index import *
+from openlibrary.catalog.merge.index import *
 from urllib import urlopen, urlencode
 
 path = '/1/edward/marc_index/'</diff>
      <filename>openlibrary/catalog/importer/import_server.py</filename>
    </modified>
    <modified>
      <diff>@@ -27,7 +27,7 @@ db = web.database(dbn='mysql', host=rc['ia_db_host'], user=rc['ia_db_user'], \
         passwd=rc['ia_db_pass'], db='archive')
 db.printing = False
 
-start = '2009-09-24 05:56:56'
+start = '2009-10-07 11:01:43'
 fh_log = open('/1/edward/logs/load_scribe', 'a')
 
 t0 = time()
@@ -88,13 +88,13 @@ def write_edition(loc, edition):
     if authors:
         q['authors'] = authors
 
-    for attempt in range(5):
+    for attempt in range(50):
         if attempt &gt; 0:
             print 'retrying'
         try:
             ret = ol.new(q, comment='initial import')
         except httplib.BadStatusLine:
-            sleep(10)
+            sleep(30)
             continue
         except: # httplib.BadStatusLine
             print q</diff>
      <filename>openlibrary/catalog/importer/load_scribe.py</filename>
    </modified>
    <modified>
      <diff>@@ -1,6 +1,6 @@
 import re, web, sys
 import simplejson as json
-from urllib2 import urlopen
+from urllib2 import urlopen, URLError
 from openlibrary.catalog.read_rc import read_rc
 from openlibrary.catalog.importer.db_read import get_mc
 from time import sleep
@@ -100,12 +100,20 @@ def add_source_records(key, ia):
         undelete_authors(authors)
     print 'saving', key
     print marshal(e)
-    sleep(5)
-    try:
-        print ol.save(key, e, 'found a matching MARC record')
-    except:
-        print e
-        raise
+    for attempt in range(50):
+        try:
+            print ol.save(key, e, 'found a matching MARC record')
+            break
+        except KeyboardInterrupt:
+            raise
+        except URLError:
+            if attempt == 49:
+                raise
+        except:
+            print e
+            raise
+        print 'attempt %d failed' % attempt
+        sleep(30)
     if new_toc:
         new_edition = ol.get(key)
         # [{u'type': &lt;ref: u'/type/toc_item'&gt;}, ...]</diff>
      <filename>openlibrary/catalog/importer/update.py</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>4eb1a9ccfef1d849f60c311100bba0b3bd2077c9</id>
    </parent>
  </parents>
  <author>
    <name>Edward Betts</name>
    <email>edwardbetts@gmail.com</email>
  </author>
  <url>http://github.com/openlibrary/openlibrary/commit/7bdb615154c377c41456fd4efdf29dfb11684072</url>
  <id>7bdb615154c377c41456fd4efdf29dfb11684072</id>
  <committed-date>2009-10-08T03:25:57-07:00</committed-date>
  <authored-date>2009-10-08T03:25:57-07:00</authored-date>
  <message>fix import lines for files that have moved</message>
  <tree>9f4bfc7a10e9337840fecaf87d50b56c449f2c9e</tree>
  <committer>
    <name>Edward Betts</name>
    <email>edwardbetts@gmail.com</email>
  </committer>
</commit>
