In [None]:
import affine, concurrent, cStringIO, glob, IPython, json, os, PIL, sys, thread, time, traceback


In [None]:
class SimpleProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor):
    def __init__(self, max_workers):
        super(SimpleProcessPoolExecutor, self).__init__(max_workers=max_workers)
        self.futures = []
        
    def submit(self, fn, *args, **kwargs):
        future = super(SimpleProcessPoolExecutor, self).submit(fn, *args, **kwargs)
        self.futures.append(future)
        return future
    
    def get_futures(self):
        return self.futures

    def shutdown(self):
        exception_count = 0
        results = []
        for completed in concurrent.futures.as_completed(self.futures):
            try:
                results.append(completed.result())
            except Exception as e:
                exception_count += 1
                sys.stderr.write(
                    'Exception caught in SimpleProcessPoolExecutor.shutdown.  Continuing until all are finished.\n' +
                    'Exception follows:\n' +
                    traceback.format_exc())
        super(SimpleProcessPoolExecutor, self).shutdown()
        if exception_count:
            raise Exception('SimpleProcessPoolExecutor failed: %d of %d raised exception' % (exception_count, len(self.futures)))
        print 'SimpleProcessPoolExecutor succeeded: all %d jobs completed' % (len(self.futures))
        return results

class Stopwatch:
    def __init__(self, name):
        self.name = name
    def __enter__(self):
        self.start = time.time()
    def __exit__(self, type, value, traceback):
        sys.stdout.write('%s took %.1f seconds\n' % (self.name, time.time() - self.start))


def subdivide(tname):
    c = "CREATE TABLE IF NOT EXISTS %s_subdivided AS SELECT ST_SubDivide(geom) AS geom, gid as %s_gid FROM %s;" % (tname, tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd
    
def create_index(tname):
    c = "CREATE INDEX IF NOT EXISTS %s_geom_idx ON %s USING GIST (geom);" % (tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd    
    
def vacuum_table(tname):
    c = "VACUUM ANALYZE %s;" % tname
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd   
    
def make_valid(tname):
    c = "UPDATE %s SET geom=ST_MakeValid(geom) WHERE ST_IsValid(geom) = false;" % tname
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd    

In [None]:
tnames = []
for f in glob.glob("../../data/redlist/*/*.shp"):
    name = os.path.basename(f).split(".")[0]
    tnames.append(name.lower())
tnames.sort()
tnames

In [None]:
exe = SimpleProcessPoolExecutor(max_workers=16)
for tname in tnames:
    exe.submit(make_valid, tname.lower() + '_subdivided')
exe.shutdown()
None

In [None]:
exe = SimpleProcessPoolExecutor(max_workers=12)
for tname in tnames:
    exe.submit(create_index, tname.lower() + '_subdivided')
exe.shutdown()
None

In [None]:
exe = SimpleProcessPoolExecutor(max_workers=12)
for tname in tnames:
    exe.submit(vacuum_table, tname.lower() + '_subdivided')
exe.shutdown()
None

In [None]:
# Check validity 
def is_valid(tname):
    print "START " + tname
    c = """
    SELECT %s_gid, ST_IsValidReason(geom)
    FROM %s_subdivided
    WHERE ST_IsValid(geom) = false
    """
    c = c % (tname, tname)
    cmd = 'psql -d biodiversity -c "%s"' % c 
    !$cmd
    print "END " + tname

In [None]:
for tname in tnames:
    is_valid(tname)
