In [1]:
import affine, concurrent, cStringIO, glob, IPython, json, os, PIL, sys, thread, time, traceback


In [2]:
class SimpleProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor):
    def __init__(self, max_workers):
        super(SimpleProcessPoolExecutor, self).__init__(max_workers=max_workers)
        self.futures = []
        
    def submit(self, fn, *args, **kwargs):
        future = super(SimpleProcessPoolExecutor, self).submit(fn, *args, **kwargs)
        self.futures.append(future)
        return future
    
    def get_futures(self):
        return self.futures

    def shutdown(self):
        exception_count = 0
        results = []
        for completed in concurrent.futures.as_completed(self.futures):
            try:
                results.append(completed.result())
            except Exception as e:
                exception_count += 1
                sys.stderr.write(
                    'Exception caught in SimpleProcessPoolExecutor.shutdown.  Continuing until all are finished.\n' +
                    'Exception follows:\n' +
                    traceback.format_exc())
        super(SimpleProcessPoolExecutor, self).shutdown()
        if exception_count:
            raise Exception('SimpleProcessPoolExecutor failed: %d of %d raised exception' % (exception_count, len(self.futures)))
        print 'SimpleProcessPoolExecutor succeeded: all %d jobs completed' % (len(self.futures))
        return results

class Stopwatch:
    def __init__(self, name):
        self.name = name
    def __enter__(self):
        self.start = time.time()
    def __exit__(self, type, value, traceback):
        sys.stdout.write('%s took %.1f seconds\n' % (self.name, time.time() - self.start))


def subdivide(tname):
    c = "CREATE TABLE IF NOT EXISTS %s_subdivided AS SELECT ST_SubDivide(wkb_geometry) AS geom, objectid as %s_gid FROM %s;" % (tname, tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd
    
def create_index(tname):
    c = "CREATE INDEX IF NOT EXISTS %s_geom_idx ON %s USING GIST (geom);" % (tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd    
    
def vacuum_table(tname):
    c = "VACUUM ANALYZE %s;" % tname
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd   
    
def make_valid_gid(tname,gid):
    c = "UPDATE %s SET geom=ST_MakeValid(geom) WHERE botw_gid = %s and ST_IsValid(geom) = false;" % (tname, gid)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd    

def select_into(tname):
    c = """
    SELECT 
      wdpa.gid as wdpa_gid, 
      m.%s_gid as %s_gid,  
      st_area(st_intersection(wdpa.geom, m.geom)::geography) as intersection_area
    INTO TABLE wdpa_%s_subdivided
    FROM wdpa_june2018_shapefile_polygons AS wdpa
    INNER JOIN %s_subdivided AS m 
      ON (ST_Intersects(wdpa.geom, m.geom))
    WHERE
    wdpa.gid = 1
    """
    c = c % (tname, tname, tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd

def insert_into(tname, wdpa_idx):
    c = """
    INSERT INTO wdpa_%s_subdivided
    SELECT 
      wdpa.gid as wdpa_gid, m.%s_gid as %s_gid, 
      st_area(st_intersection(wdpa.geom, m.geom)::geography) as intersection_area
    FROM wdpa_june2018_shapefile_polygons AS wdpa
      INNER JOIN %s_subdivided AS m 
      ON (ST_Intersects(wdpa.geom, m.geom))
    WHERE
      wdpa.gid = %s
    """
    c = c % (tname, tname, tname, tname, wdpa_idx)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd        

In [3]:
tnames = []
for f in glob.glob("../../data/redlist/*/*.shp"):
    name = os.path.basename(f).split(".")[0]
    tnames.append(name.lower())
tnames.sort()

In [None]:
for tname in tnames:
    select_into(tname)

In [None]:
exe = SimpleProcessPoolExecutor(max_workers=24)
for wdpa_idx in range(2,217796):
    for tname in tnames:
        exe.submit(insert_into, tname, wdpa_idx)
exe.shutdown()
None