In [6]:
import affine, concurrent, cStringIO, glob, IPython, json, os, PIL, sys, thread, time, traceback
# add binomial column to areas

# sum areas by binomial column

class SimpleProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor):
    def __init__(self, max_workers):
        super(SimpleProcessPoolExecutor, self).__init__(max_workers=max_workers)
        self.futures = []
        
    def submit(self, fn, *args, **kwargs):
        future = super(SimpleProcessPoolExecutor, self).submit(fn, *args, **kwargs)
        self.futures.append(future)
        return future
    
    def get_futures(self):
        return self.futures

    def shutdown(self):
        exception_count = 0
        results = []
        for completed in concurrent.futures.as_completed(self.futures):
            try:
                results.append(completed.result())
            except Exception as e:
                exception_count += 1
                sys.stderr.write(
                    'Exception caught in SimpleProcessPoolExecutor.shutdown.  Continuing until all are finished.\n' +
                    'Exception follows:\n' +
                    traceback.format_exc())
        super(SimpleProcessPoolExecutor, self).shutdown()
        if exception_count:
            raise Exception('SimpleProcessPoolExecutor failed: %d of %d raised exception' % (exception_count, len(self.futures)))
        print 'SimpleProcessPoolExecutor succeeded: all %d jobs completed' % (len(self.futures))
        return results

class Stopwatch:
    def __init__(self, name):
        self.name = name
    def __enter__(self):
        self.start = time.time()
    def __exit__(self, type, value, traceback):
        sys.stdout.write('%s took %.1f seconds\n' % (self.name, time.time() - self.start))


In [19]:
def select_into(tname):
    c = """
    SELECT 
        wdpa_gid, 
        sum(intersection_area), 
        code, 
        kingdom, 
        phylum, 
        class, 
        order_, 
        family, 
        genus, 
        binomial, 
        year    
    INTO TABLE wdpa_%s_binomial_intersections        
    FROM wdpa_%s_intersections
    WHERE presence <= 3 AND (code = 'CR' OR code = 'VU' or code = 'EN' or CODE = 'NT' or code = 'LC')
    GROUP BY wdpa_gid, binomial, code, kingdom, phylum, class, order_, family, genus, year
    ORDER BY wdpa_gid, binomial
    """
    c = c % (tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd

def add_column(tname):
    c = """
    ALTER TABLE wdpa_%s_binomial_intersections 
    ADD COLUMN binomial_area double precision
    ;
    """
    c = c % (tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd            

def update_table(tname):
    c = """
    UPDATE wdpa_%s_binomial_intersections
    SET binomial_area = m.area
    FROM %s_binomial_areas as m
    where 
    wdpa_%s_binomial_intersections.binomial = m.binomial
    ;
    """
    c = c % (tname, tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd            

def create_index(tname):
    c = "CREATE INDEX IF NOT EXISTS %s_wdpa_gid_code_idx ON wdpa_%s_binomial_intersections (wdpa_gid, code);" % (tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd    

def vacuum_table(tname):
    c = "VACUUM ANALYZE wdpa_%s_binomial_intersections;" % tname
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd   
    

In [3]:
select_binomial('mammals')

                                                                                              QUERY PLAN                                                                                               
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=1718861.27..1723770.70 rows=1963773 width=93) (actual time=49776.801..52886.758 rows=10622456 loops=1)
   Sort Key: wdpa_gid, binomial
   Sort Method: quicksort  Memory: 1865588kB
   ->  HashAggregate  (cost=1493958.24..1513595.97 rows=1963773 width=93) (actual time=20314.209..26895.930 rows=10622456 loops=1)
         Group Key: wdpa_gid, binomial, code, kingdom, phylum, class, order_, family, genus, year
         ->  Seq Scan on wdpa_mammals_intersections  (cost=0.00..953920.80 rows=19637725 width=93) (actual time=0.073..7342.473 rows=10636983 loops=1)
               Filt

In [7]:
tnames = []
for f in glob.glob("../../data/redlist/*/*.shp"):
    name = os.path.basename(f).split(".")[0]
    tnames.append(name.lower())
tnames.sort()

In [11]:
exe = SimpleProcessPoolExecutor(max_workers=8)
for tname in tnames:
    exe.submit(select_into, tname)
exe.shutdown()
None

ERROR:  relation "wdpa_combtoothblennies_binomial_intersections" already exists
ERROR:  relation "wdpa_corals_part_1_binomial_intersections" already exists
ERROR:  relation "wdpa_chondrichthyes_binomial_intersections" already exists
ERROR:  relation "wdpa_conus_binomial_intersections" already exists
ERROR:  relation "wdpa_bonefish_tarpons_binomial_intersections" already exists
ERROR:  relation "wdpa_amphibians_binomial_intersections" already exists
ERROR:  relation "wdpa_butterflyfish_binomial_intersections" already exists
ERROR:  relation "wdpa_angelfish_binomial_intersections" already exists
ERROR:  relation "wdpa_corals_part_3_binomial_intersections" already exists
ERROR:  relation "wdpa_corals_part_2_binomial_intersections" already exists
ERROR:  relation "wdpa_corals_part_2_1_binomial_intersections" already exists
ERROR:  relation "wdpa_damselfish_binomial_intersections" already exists
ERROR:  relation "wdpa_fw_crabs_binomial_intersections" already exists
ERROR:  relation "wdpa_fw

In [13]:
exe = SimpleProcessPoolExecutor(max_workers=8)
for tname in tnames:
    exe.submit(add_column, tname)
exe.shutdown()
None

ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
ALTER TABLE
SimpleProcessPoolExecutor succeeded: all 32 jobs completed


In [16]:
exe = SimpleProcessPoolExecutor(max_workers=8)
for tname in tnames:
    exe.submit(update_table, tname)
exe.shutdown()
None

UPDATE 11310
UPDATE 47599
UPDATE 96329
UPDATE 16137
UPDATE 69045
UPDATE 309083
UPDATE 92537
UPDATE 347743
UPDATE 335746
UPDATE 604239
UPDATE 688060
UPDATE 604239
UPDATE 114123
UPDATE 5426
UPDATE 277180
UPDATE 137347
UPDATE 80823
UPDATE 866064
UPDATE 119981
UPDATE 130146
UPDATE 137917
UPDATE 91616
UPDATE 79074
UPDATE 178454
UPDATE 1567382
UPDATE 410417
UPDATE 2403018
UPDATE 1763537
UPDATE 4075226
UPDATE 5572122
UPDATE 10622456
UPDATE 20392764
SimpleProcessPoolExecutor succeeded: all 32 jobs completed


In [18]:
exe = SimpleProcessPoolExecutor(max_workers=8)
for tname in tnames:
    exe.submit(create_index, tname)
exe.shutdown()
None

CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
SimpleProcessPoolExecutor succeeded: all 32 jobs completed


In [20]:
exe = SimpleProcessPoolExecutor(max_workers=8)
for tname in tnames:
    exe.submit(vacuum_table, tname)
exe.shutdown()
None

VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
VACUUM
SimpleProcessPoolExecutor succeeded: all 32 jobs completed
