<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -26,4 +26,5 @@ sgf/titles/*.sgf
 svn/*
 titles/titles.html
 titles/titles.db
-
+titles/news.html
+titles/lg.html
\ No newline at end of file</diff>
      <filename>.gitignore</filename>
    </modified>
    <modified>
      <diff>@@ -26,50 +26,71 @@ cur.executescript(&quot;&quot;&quot;
 &quot;&quot;&quot;)
 
 base_url = &quot;http://igo-kisen.hp.infoseek.co.jp/&quot;
-# base_url = &quot;http://localhost/eidogo/titles/&quot;
-page = urllib2.urlopen(base_url + &quot;topics.html&quot;)
-soup = BeautifulSoup(page)
-
-trs = soup.find(&quot;table&quot;, {&quot;width&quot;: &quot;1050&quot;}).findAll(&quot;tr&quot;)
-dt = &quot;&quot;
-
+# base_url = &quot;http://eidogo_dev/titles/&quot;
 re_tags = re.compile(&quot;&lt;[^&gt;]+&gt;&quot;)
 
-scraped_subpages = []
-
-for tr in trs:
-    tds = tr.findAll(&quot;td&quot;)
-    if (len(tds) == 0):
-        continue
+def scrape_news():
+    page = urllib2.urlopen(base_url + &quot;news.html&quot;)
+    soup = BeautifulSoup(page)
+    scraped_subpages = []
+    trs = soup.findAll(&quot;tr&quot;, {&quot;align&quot;: &quot;center&quot;})
+    for tr in trs:
+        tds = tr.findAll(&quot;td&quot;)
+        if (len(tds) == 0):
+            continue
+        sig = []
+        for td in tds:
+            sig.append(re_tags.sub(&quot;&quot;, str(td.contents[0])))
+        sig = ' '.join(sig)
+        
+        cur.execute(&quot;select * from notices where sig=?&quot;, (sig,))
+        if (cur.fetchone()):
+            continue
+        cur.execute(&quot;insert into notices (sig) values (?)&quot;, (sig,))
+        
+        subpage_fn = tr.a['href']
+        if (scraped_subpages.count(subpage_fn) &gt; 0):
+            continue
+        print subpage_fn
+        scraped_subpages.append(subpage_fn)
+        scrape_subpage(subpage_fn)
 
-    td0 = tds.pop(0).contents[0]
-    if (td0 != &quot;&amp;nbsp;&quot;):
-        dt = td0
+def scrape_topics():
+    page = urllib2.urlopen(base_url + &quot;topics.html&quot;)
+    soup = BeautifulSoup(page)
+    scraped_subpages = []
+    trs = soup.find(&quot;table&quot;, {&quot;width&quot;: &quot;1050&quot;}).findAll(&quot;tr&quot;)
+    dt = &quot;&quot;
+    for tr in trs:
+        tds = tr.findAll(&quot;td&quot;)
+        if (len(tds) == 0):
+            continue
+        td0 = tds.pop(0).contents[0]
+        if (td0 != &quot;&amp;nbsp;&quot;):
+            dt = td0
+        sig = [dt]
+        for td in tds:
+            sig.append(re_tags.sub(&quot;&quot;, str(td.contents[0])))
+        sig = ' '.join(sig)
         
-    sig = [dt]
-    for td in tds:
-        sig.append(re_tags.sub(&quot;&quot;, str(td.contents[0])))
-    
-    sig = ' '.join(sig)
-    
-    cur.execute(&quot;select * from notices where sig=?&quot;, (sig,))
-    if (cur.fetchone()):
-        continue
-    
-    cur.execute(&quot;insert into notices (sig) values (?)&quot;, (sig,))
-    
-    subpage_fn = tr.a['href']
-    
-    if (scraped_subpages.count(subpage_fn) &gt; 0):
-        continue
+        cur.execute(&quot;select * from notices where sig=?&quot;, (sig,))
+        if (cur.fetchone()):
+            continue
+        cur.execute(&quot;insert into notices (sig) values (?)&quot;, (sig,))
     
-    print subpage_fn
-    scraped_subpages.append(subpage_fn)
+        subpage_fn = tr.a['href']
+        if (scraped_subpages.count(subpage_fn) &gt; 0):
+            continue
+        print subpage_fn
+        scraped_subpages.append(subpage_fn)
+        scrape_subpage(subpage_fn)
+
+def scrape_subpage(subpage_fn):
     try:
         subpage = urllib2.urlopen(base_url + subpage_fn)
     except:
         print &quot;! not found&quot;
-        continue
+        return
     subsoup = BeautifulSoup(subpage)
     
     sgf_path = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../sgf/titles'))
@@ -90,27 +111,39 @@ for tr in trs:
             print &quot;  &quot; + fn
             sgf = sgfparser.Cursor(raw_sgf)
             info = sgf.getRootNode(0)
-            cur.execute(&quot;insert into games (fn, dt, ev, pw, pb, re) values (?,?,?,?,?,?)&quot;,
-                (fn, info['DT'][0], info['EV'][0], info['PW'][0] + ' ' + info['WR'][0], info['PB'][0] + ' ' + info['BR'][0], info['RE'][0]))
+            cur.execute(
+                &quot;insert into games (fn, dt, ev, pw, pb, re) values (?,?,?,?,?,?)&quot;,
+                (fn,
+                 info['DT'][0],
+                 info['EV'][0],
+                 info['PW'][0] + ' ' + info['WR'][0],
+                 info['PB'][0] + ' ' + info['BR'][0],
+                 info['RE'][0]))
             f = open(os.path.join(sgf_path, fn), &quot;w&quot;)
             f.write(raw_sgf)
             f.close()
 
-cur.execute(&quot;select * from games order by dt desc limit 250&quot;)
-rows = cur.fetchall()
+def output_games():
+    cur.execute(&quot;select * from games order by dt desc limit 250&quot;)
+    rows = cur.fetchall()
+
+    f = open(os.path.join(curdir, 'titles.html'), &quot;w&quot;)
+    f.write(&quot;&lt;table id='tourney-games'&gt;&lt;tr&gt;&quot; +
+            &quot;&lt;th&gt;Date&lt;/th&gt;&lt;th&gt;Event&lt;/th&gt;&lt;th&gt;White&lt;/th&gt;&lt;th&gt;Black&lt;/th&gt;&lt;th&gt;Result&lt;/th&gt;&quot; +
+            &quot;&lt;/tr&gt;&quot;)
+    cl = &quot;&quot;
+    for row in rows:
+        if (cl == &quot; class='odd'&quot;):
+            cl = &quot; class='even'&quot;
+        else:
+            cl = &quot; class='odd'&quot;
+        fn = row[0].replace(&quot;.sgf&quot;, &quot;&quot;)
+        f.write(&quot;&lt;tr&quot; + cl + &quot;&gt;&quot;)
+        for col in row[1:]:
+            f.write(&quot;&lt;td&gt;&lt;a href='./#titles/&quot; + fn + &quot;'&gt;&quot; + col + &quot;&lt;/a&gt;&lt;/td&gt;&quot;)
+        f.write(&quot;&lt;/tr&gt;&quot;)
+    f.write(&quot;&lt;/table&gt;&quot;)
+    f.close()
 
-f = open(os.path.join(curdir, 'titles.html'), &quot;w&quot;)
-f.write(&quot;&lt;table id='tourney-games'&gt;&lt;tr&gt;&lt;th&gt;Date&lt;/th&gt;&lt;th&gt;Event&lt;/th&gt;&lt;th&gt;White&lt;/th&gt;&lt;th&gt;Black&lt;/th&gt;&lt;th&gt;Result&lt;/th&gt;&lt;/tr&gt;&quot;)
-cl = &quot;&quot;
-for row in rows:
-    if (cl == &quot; class='odd'&quot;):
-        cl = &quot; class='even'&quot;
-    else:
-        cl = &quot; class='odd'&quot;
-    fn = row[0].replace(&quot;.sgf&quot;, &quot;&quot;)
-    f.write(&quot;&lt;tr&quot; + cl + &quot;&gt;&quot;)
-    for col in row[1:]:
-        f.write(&quot;&lt;td&gt;&lt;a href='./#titles/&quot; + fn + &quot;'&gt;&quot; + col + &quot;&lt;/a&gt;&lt;/td&gt;&quot;)
-    f.write(&quot;&lt;/tr&gt;&quot;)
-f.write(&quot;&lt;/table&gt;&quot;)
-f.close()
\ No newline at end of file
+scrape_news()
+output_games()
\ No newline at end of file</diff>
      <filename>titles/scrape.py</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>f3486f765f327bd39818dafa0520072d605ea377</id>
    </parent>
  </parents>
  <author>
    <name>Justin Kramer</name>
    <email>jkkramer@gmail.com</email>
  </author>
  <url>http://github.com/jkk/eidogo/commit/7748bb95fdaa161430967d8640a026e93995ef22</url>
  <id>7748bb95fdaa161430967d8640a026e93995ef22</id>
  <committed-date>2008-04-30T21:12:39-07:00</committed-date>
  <authored-date>2008-04-30T21:12:39-07:00</authored-date>
  <message>updated titles scraper</message>
  <tree>2693803c0df2f81653e5d8a7cfc1ea5baba0aad6</tree>
  <committer>
    <name>Justin Kramer</name>
    <email>jkkramer@gmail.com</email>
  </committer>
</commit>
