In [1]:
# This is based off of http://localhost:8820/notebooks/projects/demographics/Voting-2018-AW8.ipynb
# and is intended to give access to block_points functionality from other notebooks.

In [90]:
import numpy as np
import json
import pickle
import math
from shapely.geometry import Point

# Generate random points in census blocks

In [91]:
# From http://localhost:8820/notebooks/projects/demographics/Get%20random%20census%20block%20points%20from%20prototiles.ipynb
# These are filled in by calling init_block_points
try:
    all_pts
    block_geoids_2010
    blk_geoid2idx
except:
    all_pts = None
    block_geoids_2010 = None
    blk_geoid2idx = {}

# This needs to be called once before using block_points functionality
def init_block_points():
    global block_geoids_2010
    global blk_geoid2idx
    global all_pts

    if not all_pts:
        print "Loading prototiles"
        numpy_record_type = [('x','<f4'), ('y','<f4'), ('blockIdx', '<i4'), ('subIdx', '<i4')]
        all_pts = np.memmap('prototiles/master-sorted-by-block.bin', dtype=numpy_record_type)

    if not block_geoids_2010:
        print "Loading block_geoids_2010"
        block_geoids_2010 = json.load(open('block_geoids_2010.json'))

    if len(blk_geoid2idx)==0:
        print "Initializing blk_geoid2idx"
        for i in range(0, len(block_geoids_2010)):
            blk_geoid2idx[block_geoids_2010[i]] = i+1

    print "Done initializing block_points"
    
def find_point_idx(block_idx):
    global all_pts
    
    try:
        all_pts
    except:
        init_block_points()

    min_idx = 0
    max_idx = len(all_pts) - 1
    while min_idx <= max_idx:
        test_idx = int((min_idx + max_idx) / 2)
        if block_idx > all_pts[test_idx][2]:
            min_idx = test_idx + 1
        elif block_idx < all_pts[test_idx][2] or all_pts[test_idx][3] != 0:
            max_idx = test_idx - 1
        else:
            return test_idx
    return [min_idx, max_idx]

        
def block_points(block_idx):
    global all_pts

    try:
        all_pts
    except:
        init_block_points()
        
    ret = []
    idx = find_point_idx(block_idx)
    assert all_pts[idx][2] == block_idx and all_pts[idx][3] == 0
    while idx < len(all_pts):
        if all_pts[idx][2] == block_idx:
            ret.append({'x':all_pts[idx][0], 'y':all_pts[idx][1]})
        else:
            break
        idx += 1
    return ret

# For each census block, keep track of what voter each index has been assigned to
# Top level key is census block number, secondary key is 'next' for keeping track of the next index to be assigned,
# 'points' for the value returned by block_points, and voter ID: index for other entries
try:
    block_id_use_map
except:
    block_id_use_map={}

# Save/load for block_id_use_map 
#   block_id_use_map_05_09_13_17_18_18_18_a.pickle has the dot mapping used for the _h series of dots.
block_id_use_map_file_path_t = 'voters/block_id_use_map_{}.pickle'
default_bium_suffix = '05_09_13_17_18_18_18_a'

def save_block_id_use_map(suffix):
    global block_id_use_map
    global block_id_use_map_file_path_t
    
    if not suffix:
        print "ERROR: need to specify suffix to save block_id_use_map.  Default load suffix is %s"%(default_bium_suffix)
        
    block_id_use_map_file_path = block_id_use_map_file_path_t.format(suffix)
    
    print 'Saving block_id_use_map to %s'%(block_id_use_map_file_path)

    # Save out block_id_use_map
    with open(block_id_use_map_file_path, 'wb') as handle:
        pickle.dump(block_id_use_map, handle, protocol=pickle.HIGHEST_PROTOCOL)

def load_block_id_use_map(suffix):
    global block_id_use_map
    global block_id_use_map_file_path_t
    global default_bium_suffix

    if not suffix:
        suffix = default_bium_suffix
    block_id_use_map_file_path = block_id_use_map_file_path_t.format(suffix)
    
    print 'Loading block_id_use_map from %s'%(block_id_use_map_file_path)

    # Load in prep_fad_map
    with open(block_id_use_map_file_path, 'rb') as handle:
        block_id_use_map = pickle.load(handle)

# Hold onto the value returned by block_points.  
# key is census block number.  Value is an array returned by block_points.
# This doesn't need to be saved out because it can be regenerated at will from 
# block_points
try:
    block_points_map
except:
    block_points_map={}

# Keep track of which census blocks overflowed the number of points
try:
    block_id_overflow_set
except:
    block_id_overflow_set=set()

def LonLatToWebMercator(lon, lat):
    x = (lon + 180.0) * 256.0 / 360.0
    y = 128.0 - math.log(math.tan((lat + 90.0) * math.pi / 360.0)) * 128.0 / math.pi
    return [x, y]

def WebMercatorToLonLat(x,y):
    lat = math.atan(math.exp((128.0 - y) * math.pi / 128.0)) * 360.0 / math.pi - 90.0
    lon = x * 360.0 / 256.0 - 180.0
    return [lon, lat]

In [92]:
def block_get_coords(census_block, item_id):
    global block_id_use_map
    global blk_geoid2idx
    global block_points_map
    
    if(not census_block in block_id_use_map):
        # Need to create an entry, set 'next' to 0, initialize points
        # First convert from GEOID10 to the index into the block_idx
        block_idx = blk_geoid2idx[census_block]
        point_arr = block_points(block_idx)
        block_id_use_map[census_block]={'next':0, 'numpoints':len(point_arr)}
        block_points_map[census_block]=point_arr
    elif(not census_block in block_points_map):
        # We have an entry in block_id_use_map for this census_block, 
        # but not in block_points_map.  This can happen if we restore
        # block_id_use_map from a saved file.  Use block_points to fill in 
        # block_points_map
        block_idx = blk_geoid2idx[census_block]
        point_arr = block_points(block_idx)
        block_points_map[census_block]=point_arr
        
    # We know census_block is in block_id_use_map (which we may have just added)
    # Check if this ID is already registered
    if(item_id in block_id_use_map[census_block]):
        # Already assigned, just use the same index as before
        point_idx=block_id_use_map[census_block][item_id]
    else:
        # Not yet assigned, take the next available 
        # and increment 'next'
        point_idx = block_id_use_map[census_block]['next']
        if(point_idx>block_id_use_map[census_block]['numpoints']-1):
            # Overflow
            if(not census_block in block_id_overflow_set):
                block_id_overflow_set.add(census_block)
                print "WARNING: overflow in census block %s" % (census_block)
            # For now, just start reassigning numbers back at 0
            point_idx=0
            block_id_use_map[census_block]['next']=0
        block_id_use_map[census_block]['next'] = point_idx + 1
        block_id_use_map[census_block][item_id] = point_idx
        
    # Retrieve the webmercator point from the points array.
    # Convert it to lat lon and create a Point to return.
    # Note that WebMercatorToLonLat(x,y) returns [lon, lat]
    # the args to create a Point are also (lon, lat)
    point_xy = block_points_map[census_block][point_idx]
    ll_arr = WebMercatorToLonLat(point_xy['x'],point_xy['y'])
    return Point(ll_arr[0], ll_arr[1])


In [93]:
#block_get_coords('420034886004011', '33-0941669_X_82_41.0_1.93_42003488600_2017')

In [94]:
# From http://localhost:8820/notebooks/projects/demographics/Import%20Opportunity%20Atlas.ipynb
# Do tract->block mappings
try:
    tract_block_indexes
except:
    tract_block_indexes = {}
    
def init_tract_block_indexes():
    global tract_block_indexes
    global block_geoids_2010
    
    if not block_geoids_2010 or len(block_geoids_2010)==0:
        init_block_points()
       
    if len(tract_block_indexes)==0:
        for block_index_minus_one, block_geoid in enumerate(block_geoids_2010):
            block_index = block_index_minus_one + 1
            tract_name = block_geoid[0:11] # SSCCCTTTTTT
            if tract_name in tract_block_indexes:
                tract_block_indexes[tract_name].append(block_index)
            else:
                tract_block_indexes[tract_name]=[block_index]

    print 'There are', len(tract_block_indexes), 'tracts in tract_block_indexes'
    
def get_tract_block_indexes(tract_id):
    global tract_block_indexes

    if not tract_block_indexes or len(tract_block_indexes)==0:
        init_tract_block_indexes()
        
    if tract_id in tract_block_indexes:
        return tract_block_indexes[tract_id]
    return []

def get_tract_block_geoids(tract_id):
    global tract_block_indexes

    if not tract_block_indexes or len(tract_block_indexes)==0:
        init_tract_block_indexes()
        
    if tract_id in tract_block_indexes:
        return map(lambda x: block_geoids_2010[x-1],tract_block_indexes[tract_id])
    return []

In [95]:
#tract_block_indexes['42003160300']

In [96]:
#filter(lambda x: x[0:8] == '42003160',block_geoids_2010)

In [97]:
#get_tract_block_geoids('42003202300')

In [None]:
# Set up paths to initialize tract_populations for a given type of population
# tract_populations can reflect many different kinds of populations, so don't 
# make this global.  The notebook using it needs to hold on to it
pop_col_paths = {'all':'columncache/census2010_block2010/p001001.numpy',
                 'rental_pop':'columncache/census2010_block2010/H0110004.numpy',
                 'rentals':'columncache/census2010_block2010/H0040004.numpy',
                 'black_renters':'columncache/census2010_block2010/h014012.numpy',
                 'white_renters':'columncache/census2010_block2010/h014011.numpy'
                }

try:
    tract_population_map
    blk_population_map
except:
    tract_population_map={}
    blk_population_map={}
    
def get_tract_populations(pop_type):
    if not pop_type in pop_col_paths:
        print "Unknown population type in get_tract_populations.  Add %s to pop_col_paths and try again" %(pop_type)
        return None
    
    # Check if we already have a map for this population
    if pop_type in tract_population_map:
        return tract_population_map[pop_type]
    
    if pop_type not in blk_population_map:
        blk_population_map[pop_type]={}
    
    # Don't have this population loaded yet
    block_populations = np.load(pop_col_paths[pop_type])
    
    print 'block_populations for', pop_type, 'has', sum(block_populations), 'total people'
    
    tract_populations = {}

    for block_index_minus_one, block_geoid in enumerate(block_geoids_2010):
        block_index = block_index_minus_one + 1
        tract_name = block_geoid[0:11] # SSCCCTTTTTT
        if block_geoid not in blk_population_map[pop_type]:
            blk_population_map[pop_type][block_geoid]=block_populations[block_index]
            
        if tract_name not in tract_populations:
            tract_populations[tract_name] = 0
        tract_populations[tract_name] += block_populations[block_index]

    print 'tract_populations for', pop_type, 'has', sum(tract_populations.values()), 'people'
    
    # Hold onto this in case someone asks again later
    tract_population_map[pop_type] = tract_populations
    
    return tract_populations

def get_block_populations(pop_type):
    if not pop_type in pop_col_paths:
        print "Unknown population type in get_block_populations.  Add %s to pop_col_paths and try again" %(pop_type)
        return None
    
    # Check if we already have a map for this population
    if pop_type in blk_population_map:
        return blk_population_map[pop_type]
    
    # Don't have this population loaded yet
    get_tract_populations(pop_type)
    return blk_population_map[pop_type]

In [None]:
# get_block_populations('all')

In [None]:
#get_tract_populations('all')

In [32]:
#blk_population_map['all']['420035630001013']

406

In [14]:
#init_tract_block_indexes()

There are 73057 tracts in tract_block_indexes


In [16]:
#'01001020100' in tract_block_indexes

True