<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -16,7 +16,7 @@ test:
 sync: rsync gitsync
 
 rsync:
-	# rsync -avzu rsync://watchdog.net/data data ||
+	# rsync -avzu watchdog.net:~watchdog/web/data .
 	rsync -avzu rsync://watchdog.net/data data
 
 gitsync:</diff>
      <filename>Makefile</filename>
    </modified>
    <modified>
      <diff>@@ -10,6 +10,8 @@ export PYTHONPATH := ../:.:$(PWD)/../vendor:$(PYTHONPATH)
 
 PYTHON=python
 
+CRAWL=../data/crawl
+PARSE=../data/parse
 LOAD=../data/load
 JSONS=$(LOAD)/states/index.json \
       $(LOAD)/districts/index.json \
@@ -89,3 +91,37 @@ $(LOAD)/politicians/earmarks.json: load/earmarks.py $(LOAD)/politicians/govtrack
 .interests: .schema load/maplight.py .bills
 	$(PYTHON) load/maplight.py
 	touch $@
+
+
+# new way of doing things: parse to .njs files
+
+parse: $(PARSE)/almanac.njs \
+	   $(PARSE)/earmarks.njs \
+       $(PARSE)/fec.njs \
+       $(PARSE)/govtrack.njs \
+       $(PARSE)/soi.njs \
+       $(PARSE)/voteview.njs
+
+$(PARSE)/almanac.njs: parse/almanac.py 
+	$(PYTHON) parse/almanac.py --dump &gt; $@.tmp
+	mv $@.tmp $@
+
+$(PARSE)/earmarks.njs: parse/earmarks.py $(CRAWL)/taxpayer/bigkahuna.xls
+	$(PYTHON) parse/earmarks.py &gt; $@.tmp
+	mv $@.tmp $@
+
+$(PARSE)/fec.njs: parse/fec.py $(CRAWL)/fec
+	$(PYTHON) parse/fec.py &gt; $@.tmp
+	mv $@.tmp $@
+
+$(PARSE)/govtrack.njs: parse/govtrack.py
+	$(PYTHON) parse/govtrack.py &gt; $@.tmp
+	mv $@.tmp $@
+
+$(PARSE)/soi.njs: parse/soi.py $(CRAWL)/irs/soi/
+	$(PYTHON) parse/soi.py &gt; $@.tmp
+	mv $@.tmp $@
+
+$(PARSE)/voteview.njs: parse/voteview.py
+	$(PYTHON) parse/voteview.py &gt; $@.tmp
+	mv $@.tmp $@</diff>
      <filename>import/Makefile</filename>
    </modified>
    <modified>
      <diff>@@ -203,4 +203,20 @@ def main(files):
         print &quot;%s as state:&quot; % fname
         print pprint.pprint(scrape_state(fname))
 
-if __name__ == '__main__': main(sys.argv[1:])
+def dump():
+    import glob
+    import tools
+    
+    DATA_DIR = '../data'
+    ALMANAC_DIR = DATA_DIR + '/crawl/almanac/nationaljournal.com/pubs/almanac/2008/'
+
+    for fn in glob.glob(ALMANAC_DIR + 'people/*/rep*'):
+        tools.export(scrape_person(fn))
+    for fn in glob.glob(ALMANAC_DIR + 'states/*/index.html'):
+        tools.export(scrape_state(fn))
+
+if __name__ == '__main__':
+    if '--dump' in sys.argv[1:]:
+        dump()
+    else:
+        main(sys.argv[1:])</diff>
      <filename>import/parse/almanac.py</filename>
    </modified>
    <modified>
      <diff>@@ -133,6 +133,5 @@ def printEarmarks(rows):
 #printEarmarks(marks[&quot;Edwards&quot;])
 
 if __name__ == &quot;__main__&quot;:
-    import simplejson
-    earmarks = getEarmarks('../data/crawl/taxpayer/bigkahuna.xls')
-    print simplejson.dumps([x.__dict__ for x in earmarks], indent=2, sort_keys=True)
+    import tools
+    tools.export(x.__dict__ for x in getEarmarks('../data/crawl/taxpayer/bigkahuna.xls'))</diff>
      <filename>import/parse/earmarks.py</filename>
    </modified>
    <modified>
      <diff>@@ -6,7 +6,7 @@ parse data from govtrack.us
 STATS_XML = '../data/crawl/govtrack/us/110/repstats/%s.xml'
 FEC_XML = '../data/crawl/govtrack/us/fec/campaigns-2008.xml'
 METRICS = ['enacted', 'novote', 'verbosity', 'speeches', 
-  'spectrum', 'introduced', 'cosponsored']
+  'spectrum', 'introduced', 'cosponsor']
 
 from xml.dom import pulldom
 import web
@@ -53,5 +53,5 @@ def parse_fec():
 
 if __name__ == &quot;__main__&quot;:
     tools.export(parse_basics())
-    tools.export(parse_speeches())
+    tools.export(parse_stats())
     tools.export(parse_fec())</diff>
      <filename>import/parse/govtrack.py</filename>
    </modified>
    <modified>
      <diff>@@ -142,21 +142,24 @@ def parse_state(state):
         
         loc += 8
 
-def parse_soi():
+def parse_soi(verbose=False):
+    import sys
+
     states = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 
     'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 
     'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 
     'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 
     'WA', 'WI', 'WV', 'WY']
-    
+
+    if verbose: print&gt;&gt;sys.stderr
     for state in states:
-        import sys
-        print&gt;&gt;sys.stderr, state
+        if verbose: print&gt;&gt;sys.stderr, &quot;\rParsing&quot;, state + '...',
         for x in parse_state(state):
             if x.loc.strip() == 'Total':
                 x.loc = state
             yield x
+    if verbose: print &gt;&gt;sys.stderr, '\r                     '
 
 if __name__ == &quot;__main__&quot;:
     import tools
-    tools.export(parse_soi())
+    tools.export(parse_soi(verbose=True))</diff>
      <filename>import/parse/soi.py</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>0bd1845b93341bd692e3c5d8f64597bea105ec9b</id>
    </parent>
  </parents>
  <author>
    <name>Aaron Swartz</name>
    <email>me@aaronsw.com</email>
  </author>
  <url>http://github.com/aaronsw/watchdog/commit/9c5ec5385bcc6f9936af347271005eea00f519e0</url>
  <id>9c5ec5385bcc6f9936af347271005eea00f519e0</id>
  <committed-date>2008-07-23T13:56:49-07:00</committed-date>
  <authored-date>2008-07-23T13:54:50-07:00</authored-date>
  <message>generate njs files from parser</message>
  <tree>b12c63de113f4be83dabdac1f682dddeec28616a</tree>
  <committer>
    <name>Aaron Swartz</name>
    <email>me@aaronsw.com</email>
  </committer>
</commit>
