LODES 7.5 https://lehd.ces.census.gov/data/

Census Block Shapefile https://www2.census.gov/geo/tiger/TIGER2010BLKPOPHU

NYC Metropolitan Scope and Shapefile https://metroexplorer.planning.nyc.gov/about



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Libraries

In [None]:
from IPython.core.display import clear_output

import pandas as pd
pd.options.display.max_columns = 100
import numpy as np
import re
import os
from glob import glob

from tqdm import tqdm
tqdm.pandas()

## GeoPandas
from IPython.core.display import clear_output
!pip install geopandas
!pip install -U rtree folium mapclassify matplotlib seaborn
clear_output()
import geopandas as gpd


def flatten_list(l):
  return [item for sublist in l for item in sublist]

def standardize_column_names(df, custom_mapping = None):
  if custom_mapping is None:
    df.columns = [col.lower().replace(' ','_') for col in df.columns]
  else:
    df.columns = [custom_mapping[col] for col in df.columns]
  return df

def safe_log(m):
  # Reference: https://stackoverflow.com/a/52209380
  m = np.array(m, dtype=float)
  return np.log(m, out=np.zeros_like(m), where=(m!=0))

!pip install osmnx
clear_output()
import osmnx as ox

# !pip install folderstats -q
# import folderstats
# !pip install humanfriendly -q
# from humanfriendly import format_size
# def get_folderstats(root_directory_path):
#   folderstats_df = folderstats.folderstats(root_directory_path, ignore_hidden=True)
#   folderstats_df['readable_size'] = folderstats_df['size'].apply(lambda x: format_size(x))
#   return folderstats_df

#### PyDeck

In [None]:
## PyDeck
from google.colab import output
output.enable_custom_widget_manager()
# output.disable_custom_widget_manager()
!pip install pydeck
clear_output()
import pydeck as pdk

from pydeck.types import String as pdk_string
def create_pdk_text_layer(data, text, position = 'coordinates', size = 12, color = [0,0,0], angle = 0, text_anchor = 'middle', alignment_baseline = 'center', pickable = True):
  return pdk.Layer(
      'TextLayer',
      data,
      get_text = text,
      get_position = position,
      # Text styling and positioning
      get_size = size,
      get_color = color,
      get_angle = angle,
      get_text_anchor = pdk_string(text_anchor),
      get_alignment_baseline = pdk_string(alignment_baseline),
      # Interactivity
      pickable = pickable
  )

def create_pdk_polygon_layer(data, polygon, filled = True, fill_color = [255, 255, 255], opacity = 0.1, stroked = False, line_color = [255, 255, 255], line_width = 1, extruded = False, elevation = 0, wireframe = False, auto_highlight = True, pickable = True):
  return pdk.Layer(
      'PolygonLayer',
      data,
      get_polygon = polygon,
      # Fill
      filled = filled,
      get_fill_color = fill_color,
      opacity = opacity,
      # Border
      stroked = stroked,
      get_line_color = line_color,
      get_line_width = line_width,
      # Height
      extruded = extruded,
      get_elevation = elevation,
      # Surface
      wireframe = wireframe,
      # Interactivity
      auto_highlight = auto_highlight,
      pickable = pickable,
  )

def create_pdk_column_layer(data, position, radius = 100, coverage = 1, filled = True, fill_color = [255, 255, 255], opacity = 0.1, stroked = False, line_color = [255, 255, 255], line_width = 1, extruded = True, elevation = 0, elevation_scale = 100, wireframe = False, auto_highlight = True, pickable = True):
  return pdk.Layer(
      'ColumnLayer',
      data,
      get_position = position,
      # Column property
      radius = radius,
      coverage = coverage,
      # Fill
      filled = filled,
      get_fill_color = fill_color,
      opacity = opacity,
      # Border
      stroked = stroked,
      get_line_color = line_color,
      get_line_width = line_width,
      # Height
      extruded = extruded,
      get_elevation = elevation,
      elevation_scale = elevation_scale,
      # Surface
      wireframe = wireframe,
      # Interactivity
      auto_highlight = auto_highlight,
      pickable = pickable,
  )

def create_pdk_arc_layer(data, source_position, target_position, width = 1, height = 1, tilt = 0, great_circle = False, source_color = [255, 0, 0, 40], target_color = [0, 255, 0, 40], auto_highlight = True, pickable = True):
  return pdk.Layer(
      'ArcLayer',
      data,
      get_source_position = source_position,
      get_target_position = target_position,
      # Arc property
      get_width = width,
      get_height = height,
      get_tilt = tilt,
      great_circle = great_circle,
      # Fill
      get_source_color = source_color,
      get_target_color = target_color,
      # Interactivity
      auto_highlight = auto_highlight,
      pickable = pickable,
  )

def show_deck(layers, view, tooltip = True, iframe_height = 500, html_filepath = '/content/temp.html'):
  r = pdk.Deck( layers, initial_view_state=view, map_style=pdk.map_styles.DARK, tooltip = tooltip) # tooltip={'html': '<br>'.join(['<b>'+c+':</b> {'+c+'}' for c in tooltip_columns])}
  _ = r.to_html(html_filepath, iframe_height = iframe_height)

In [None]:
def get_color_mapping(series, cmap='plasma', return_hex = False):
  gradient_colors = ox.plot.get_colors(n=max(series)+1, cmap=cmap, start=0, return_hex=return_hex)
  if not return_hex:
    gradient_colors = [(np.array(color)[:3] * 255).round().astype(np.uint8).tolist() for color in gradient_colors]
  color_mapping = dict(zip(sorted(set(series)), gradient_colors))
  return color_mapping

# [(v, 'rgb('+','.join(map(str,color))+')') for v,color in speed_color_scale] # for plotly

def add_color_column(df, field, color_column_name):
  color_mapping = get_color_mapping(df[field])
  df[color_column_name] = df[field].map(color_mapping)
  return df

def log_round_clip_and_color(df, field):
  df[field+'_log'] = safe_log(df[field])
  df[field+'_log_rounded'] = df[field+'_log'].round(0).astype(int)
  print(df[field+'_log_rounded'].value_counts().sort_index())
  lower_value = input('lower value? ')
  upper_value = input('upper value? ')
  clear_output()
  df[field+'_log_rounded_clipped'] = df[field+'_log_rounded'].clip(lower=int(lower_value),upper=int(upper_value))
  df = add_color_column(df, field+'_log_rounded_clipped', field+'_color')
  return df

## Get LODES 7.5 Data

In [None]:
# !wget -e robots=off --recursive --no-parent https://lehd.ces.census.gov/data/lodes/LODES7/ny/
# !wget -e robots=off --recursive --no-parent https://lehd.ces.census.gov/data/lodes/LODES7/nj/
# !wget -e robots=off --recursive --no-parent https://lehd.ces.census.gov/data/lodes/LODES7/ct/
# !mv /content/lehd.ces.census.gov/data/lodes/LODES7 /content/drive/MyDrive/nyc_data/demand/lodes/raw
# clear_output()

# df = get_folderstats('/content/drive/MyDrive/nyc_data/demand/lodes/raw/LODES7')
# for p in df.loc[df['name'] == 'index','path'].tolist():
#   os.remove(p)
# df = get_folderstats('/content/drive/MyDrive/nyc_data/demand/lodes/raw/LODES7')
# df = df.sort_values('id').reset_index(drop=True)

## Get Census Block Shapefile

In [None]:
# !wget -e robots=off --recursive --no-parent https://www2.census.gov/geo/tiger/TIGER2010BLKPOPHU/tabblock2010_09_pophu.zip
# !wget -e robots=off --recursive --no-parent https://www2.census.gov/geo/tiger/TIGER2010BLKPOPHU/tabblock2010_34_pophu.zip
# !wget -e robots=off --recursive --no-parent https://www2.census.gov/geo/tiger/TIGER2010BLKPOPHU/tabblock2010_36_pophu.zip

# gdf = gpd.GeoDataFrame()
# for p in glob('/content/www2.census.gov/geo/tiger/TIGER2010BLKPOPHU/*'):
#   gdf = gdf.append( gpd.read_file(p) , ignore_index = True)
# gdf.to_file('/content/drive/MyDrive/nyc_data/demand/tristate_census_block_tiger_2010.geojson', driver='GeoJSON')
# gdf.to_feather('/content/drive/MyDrive/nyc_data/demand/tristate_census_block_tiger_2010.feather')

## Define NYC Metro Region Scope

In [None]:
# nyc_metro_region_scope = gpd.read_file('/content/drive/MyDrive/nyc_data/demand/nyc_metro_region_explorer_census_tract_2010.geojson')

# nyc_metro_region_scope['state_county_id'] = nyc_metro_region_scope['statefp'].apply(int)*1000 + nyc_metro_region_scope['countyfp'].apply(int)

# nyc_metro_region_state_county_id_list = nyc_metro_region_scope['state_county_id'].unique().tolist()

# print(len(nyc_metro_region_state_county_id_list))

In [None]:
# tristate_census_block = gpd.read_feather('/content/drive/MyDrive/nyc_data/demand/tristate_census_block_tiger_2010.feather')

# tristate_census_block['state_county_id'] = tristate_census_block['STATEFP10'].apply(int)*1000 + tristate_census_block['COUNTYFP10'].apply(int)

# nyc_metro_census_block = tristate_census_block[tristate_census_block['state_county_id'].isin(nyc_metro_region_state_county_id_list)].reset_index(drop=True)
# nyc_metro_census_block = nyc_metro_census_block.explode().reset_index(drop=True).drop_duplicates(subset=['BLOCKID10'], keep='first')
# nyc_metro_census_block.to_feather('/content/drive/MyDrive/nyc_data/demand/nyc_metro_census_block_tiger_2010.feather')

# nyc_proper_census_block = nyc_metro_census_block[nyc_metro_census_block['nyc_city_proper']]
# nyc_proper_census_block = nyc_proper_census_block.explode().reset_index(drop=True).drop_duplicates(subset=['BLOCKID10'], keep='first')

# nyc_proper_census_block.to_feather('/content/drive/MyDrive/nyc_data/demand/nyc_proper_census_block_tiger_2010.feather')

## Clean LODES for NYC Metro Region

### Clean OD files

In [None]:
# nyc_metro_census_block = gpd.read_feather('/content/drive/MyDrive/nyc_data/demand/nyc_metro_census_block_tiger_2010.feather')
# nyc_metro_census_block_set = set(nyc_metro_census_block['BLOCKID10'].apply(int).tolist())

# df = get_folderstats('/content/drive/MyDrive/nyc_data/demand/lodes/raw/LODES7')

# for year in [2010, 2015, 2019]:

#   od_main_df = pd.DataFrame()
#   for p in df.loc[df.name.str.contains('od_main_JT00_'+str(year)), 'path'].tolist():
#     od_main_df = od_main_df.append(pd.read_csv(p), ignore_index = True)

#   od_aux_df = pd.DataFrame()
#   for p in df.loc[df.name.str.contains('od_aux_JT00_'+str(year)), 'path'].tolist():
#     od_aux_df = od_aux_df.append(pd.read_csv(p), ignore_index = True)

#   od_df = od_main_df.append(od_aux_df, ignore_index=True).drop('createdate', axis=1)

#   od_df = od_df[(od_df['w_geocode'].isin(nyc_metro_census_block_set)) & (od_df['h_geocode'].isin(nyc_metro_census_block_set))].reset_index(drop=True)

#   od_df.to_csv('/content/drive/MyDrive/nyc_data/demand/nyc_metro_lodes_'+str(year)+'.csv', index=False)

#   del od_df

### Clean WAC and RAC files

In [None]:
nyc_metro_census_block = gpd.read_feather('/content/drive/MyDrive/nyc_data/demand/nyc_metro_census_block_tiger_2010.feather')
nyc_metro_census_block_set = set(nyc_metro_census_block['BLOCKID10'].apply(int).tolist())

nyc_proper_census_block = gpd.read_feather('/content/drive/MyDrive/nyc_data/demand/nyc_proper_census_block_tiger_2010.feather')
nyc_proper_census_block_set = set(nyc_proper_census_block['BLOCKID10'].apply(int).tolist())

In [None]:
wac_and_rac_codebook = pd.read_csv('/content/drive/MyDrive/nyc_data/demand/lodes/WAC_and_RAC_codebook.csv')
code_to_descriptive_name_mapping = wac_and_rac_codebook.set_index('variable')['abbr_desc'].to_dict()

In [None]:
raw_lodes_data_foler = '/content/drive/MyDrive/nyc_data/demand/lodes/raw/LODES7'

In [None]:
def aggregate_lodes_area_characteristics_files(
    file_type,  
    job_type, 
    file_year,
    state_codes,
    segment_codes, 
    columns_to_drop, 
    additional_columns_to_drop):

  block_id_column = 'w_geocode' if file_type == 'wac' else 'h_geocode'

  all_df = pd.DataFrame()
  for state_code in state_codes:
    state_df = None
    for segment_code in segment_codes:  
      segment_df = pd.read_csv(raw_lodes_data_foler+'/'+state_code+'/'+file_type+'/'+state_code+'_'+file_type+'_'+segment_code+'_'+job_type+'_'+str(file_year)+'.csv.gz').drop(additional_columns_to_drop+columns_to_drop, axis=1)
      segment_df = segment_df[segment_df[block_id_column].isin(nyc_metro_census_block_set)].reset_index(drop=True)
      segment_df = standardize_column_names(segment_df, code_to_descriptive_name_mapping)
      if segment_code != 'S000':
        segment_df.columns = [block_id_column]+(code_to_descriptive_name_mapping[segment_code.replace('S','C')]+'__'+segment_df.columns[1:]).tolist()
      if state_df is None:
        state_df = segment_df
      else:
        state_df = pd.merge(state_df, segment_df, on=block_id_column, how='outer')
    state_df = state_df.fillna(0).astype(int)
    all_df = all_df.append(state_df, ignore_index = True)
  return all_df

In [None]:
### nyc_metro_wac_all_jobs_2019 ###

file_type = 'wac'
job_type = 'JT00'
file_year = 2019
state_codes = ['ct','nj','ny']
columns_to_drop = ['CFA01', 'CFA02', 'CFA03', 'CFA04', 'CFA05', 'CFS01', 'CFS02', 'CFS03', 'CFS04', 'CFS05', 'createdate']

#########################################################################################################

segment_codes = ['S000']
additional_columns_to_drop = []

all_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SE01','SE02','SE03']
additional_columns_to_drop = ['C000', 'CE01', 'CE02', 'CE03']

by_earning_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SA01','SA02','SA03']
additional_columns_to_drop = ['C000', 'CA01', 'CA02', 'CA03']

by_age_segment  = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

geocode_column = all_segment.columns[0]
all_segment = pd.merge(all_segment, by_earning_segment, on=geocode_column, how='outer')
all_segment = pd.merge(all_segment, by_age_segment    , on=geocode_column, how='outer')
all_segment.to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_metro_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)
all_segment[all_segment[geocode_column].isin(nyc_proper_census_block_set)].to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_city_proper_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)

In [None]:
### nyc_metro_wac_all_private_jobs_2019 ###

file_type = 'wac'
job_type = 'JT02'
file_year = 2019
state_codes = ['ct','nj','ny']
columns_to_drop = ['createdate']

#########################################################################################################

segment_codes = ['S000']
additional_columns_to_drop = []

all_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SE01','SE02','SE03']
additional_columns_to_drop = ['C000', 'CE01', 'CE02', 'CE03']

by_earning_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SA01','SA02','SA03']
additional_columns_to_drop = ['C000', 'CA01', 'CA02', 'CA03']

by_age_segment  = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

geocode_column = all_segment.columns[0]
all_segment = pd.merge(all_segment, by_earning_segment, on=geocode_column, how='outer')
all_segment = pd.merge(all_segment, by_age_segment    , on=geocode_column, how='outer')
all_segment.to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_metro_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)
all_segment[all_segment[geocode_column].isin(nyc_proper_census_block_set)].to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_city_proper_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)

In [None]:
### nyc_metro_wac_all_federal_jobs_2019 ###

file_type = 'wac'
job_type = 'JT04'
file_year = 2019
state_codes = ['ct','nj','ny']
columns_to_drop = ['createdate']

#########################################################################################################

segment_codes = ['S000']
additional_columns_to_drop = []

all_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SE01','SE02','SE03']
additional_columns_to_drop = ['C000', 'CE01', 'CE02', 'CE03']

by_earning_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SA01','SA02','SA03']
additional_columns_to_drop = ['C000', 'CA01', 'CA02', 'CA03']

by_age_segment  = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

geocode_column = all_segment.columns[0]
all_segment = pd.merge(all_segment, by_earning_segment, on=geocode_column, how='outer')
all_segment = pd.merge(all_segment, by_age_segment    , on=geocode_column, how='outer')
all_segment.to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_metro_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)
all_segment[all_segment[geocode_column].isin(nyc_proper_census_block_set)].to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_city_proper_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)

In [None]:
### nyc_metro_rac_all_jobs_2019 ###

file_type = 'rac'
job_type = 'JT00'
file_year = 2019
state_codes = ['ct','nj','ny']
columns_to_drop = ['createdate']

#########################################################################################################

segment_codes = ['S000']
additional_columns_to_drop = []

all_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SE01','SE02','SE03']
additional_columns_to_drop = ['C000', 'CE01', 'CE02', 'CE03']

by_earning_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SA01','SA02','SA03']
additional_columns_to_drop = ['C000', 'CA01', 'CA02', 'CA03']

by_age_segment  = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

geocode_column = all_segment.columns[0]
all_segment = pd.merge(all_segment, by_earning_segment, on=geocode_column, how='outer')
all_segment = pd.merge(all_segment, by_age_segment    , on=geocode_column, how='outer')
all_segment.to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_metro_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)
all_segment[all_segment[geocode_column].isin(nyc_proper_census_block_set)].to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_city_proper_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)

In [None]:
### nyc_metro_rac_all_private_jobs_2019 ###

file_type = 'rac'
job_type = 'JT02'
file_year = 2019
state_codes = ['ct','nj','ny']
columns_to_drop = ['createdate']

#########################################################################################################

segment_codes = ['S000']
additional_columns_to_drop = []

all_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SE01','SE02','SE03']
additional_columns_to_drop = ['C000', 'CE01', 'CE02', 'CE03']

by_earning_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SA01','SA02','SA03']
additional_columns_to_drop = ['C000', 'CA01', 'CA02', 'CA03']

by_age_segment  = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

geocode_column = all_segment.columns[0]
all_segment = pd.merge(all_segment, by_earning_segment, on=geocode_column, how='outer')
all_segment = pd.merge(all_segment, by_age_segment    , on=geocode_column, how='outer')
all_segment.to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_metro_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)
all_segment[all_segment[geocode_column].isin(nyc_proper_census_block_set)].to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_city_proper_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)

In [None]:
### nyc_metro_rac_all_federal_jobs_2019 ###

file_type = 'rac'
job_type = 'JT04'
file_year = 2019
state_codes = ['ct','nj','ny']
columns_to_drop = ['createdate']

#########################################################################################################

segment_codes = ['S000']
additional_columns_to_drop = []

all_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SE01','SE02','SE03']
additional_columns_to_drop = ['C000', 'CE01', 'CE02', 'CE03']

by_earning_segment = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

segment_codes = ['SA01','SA02','SA03']
additional_columns_to_drop = ['C000', 'CA01', 'CA02', 'CA03']

by_age_segment  = aggregate_lodes_area_characteristics_files(file_type,  job_type, file_year, state_codes, segment_codes, columns_to_drop, additional_columns_to_drop)

#########################################################################################################

geocode_column = all_segment.columns[0]
all_segment = pd.merge(all_segment, by_earning_segment, on=geocode_column, how='outer')
all_segment = pd.merge(all_segment, by_age_segment    , on=geocode_column, how='outer')
all_segment.to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_metro_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)
all_segment[all_segment[geocode_column].isin(nyc_proper_census_block_set)].to_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_city_proper_'+file_type+'_'+job_type+'_'+str(file_year)+'.parquet', index=False)

In [None]:
# t = pd.merge( nyc_proper_wac_all_private_jobs.set_index('w_geocode')['num_of_jobs'].rename('private'), nyc_proper_wac_all_federal_jobs.set_index('w_geocode')['num_of_jobs'].rename('federal'), left_index=True, right_index=True, how='outer')
# t = pd.merge( t, nyc_proper_wac_all_jobs.set_index('w_geocode')['num_of_jobs'].rename('all'), left_index=True, right_index=True, how='outer')
# t = t.fillna(0)
# t['private_prop'] = t['private']/t['all']
# t['private_plus_federal'] = t['private'] + t['federal']
# t['private_plus_federal_prop'] = t['private_plus_federal']/t['all']
# t['private_prop'].hist(bins=50)
# t['plus_prop'].hist(bins=50)

### Map WAC and RAC files

In [None]:
geospatial_scope = 'proper' 
job_type = 'JT02'
file_year = 2019
wac_df = pd.read_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_'+geospatial_scope+'_wac_'+job_type+'_'+str(file_year)+'.parquet')
rac_df = pd.read_parquet('/content/drive/MyDrive/nyc_data/demand/nyc_'+geospatial_scope+'_rac_'+job_type+'_'+str(file_year)+'.parquet')

# block_gis = gpd.read_feather('/content/drive/MyDrive/nyc_data/demand/nyc_'+geospatial_scope+'_census_block_tiger_2010.feather')
# block_gis = block_gis[['BLOCKID10','geometry']]
# block_gis['BLOCKID10'] = block_gis['BLOCKID10'].astype(int)
# block_gis = add_centroid_column(block_gis, geom_column = 'geometry', proj2 = 'epsg:2263')
# block_gis['census_block_area_size'] = block_gis['geometry'].to_crs('epsg:2263').area/1e6   # area size in km^2
# block_gis['polygon'] = block_gis['geometry'].exterior.apply(lambda pt: (np.array(pt.xy).T).tolist())
# block_gis.to_feather('/content/drive/MyDrive/nyc_data/demand/nyc_'+geospatial_scope+'_census_block_tiger_2010_augmented.feather')

block_gis = gpd.read_feather('/content/drive/MyDrive/nyc_data/demand/nyc_'+geospatial_scope+'_census_block_tiger_2010_augmented.feather')

In [None]:
# wac = block_gis.merge( wac_df, left_on='BLOCKID10', right_on='w_geocode')
# view = pdk.data_utils.compute_view(flatten_list(wac['polygon'].tolist()))

In [None]:
# wac['age_you_prop'] = (wac['age_you']/wac['num_of_jobs']).round(2)
# wac['age_you_prop_multiply_10_rounded'] = (wac['age_you_prop']*10).round(0).astype(int)
# wac = add_color_column(wac, 'age_you_prop_multiply_10_rounded', 'age_you_prop_color')

# youth_proportion_layer = create_pdk_polygon_layer(wac[['geometry','age_you_prop_color','age_you','age_you_prop','BLOCKID10']], 'geometry.coordinates', fill_color='age_you_prop_color', opacity = 0.8)

In [None]:
# wac['age_you_density'] = (wac['age_you']/wac['census_block_area_size']).round(0).astype(int)
# wac = log_round_clip_and_color(wac, 'age_you_density')

# youth_density_layer = create_pdk_polygon_layer(wac[['geometry','age_you_density_color','age_you','age_you_prop','BLOCKID10']], 'geometry.coordinates', fill_color='age_you_density_color', opacity = 0.8)

In [None]:
# wac['sec_finance_density'] = (wac['sec_finance']/wac['census_block_area_size']).round(0).astype(int)
# wac = log_round_clip_and_color(wac, 'sec_finance_density')
# wac['sec_finance_density_log_rounded_clipped_multiply_by_100'] = wac['sec_finance_density_log_rounded_clipped']*100
# finance_worker_density_layer = create_pdk_polygon_layer(wac[['geometry','sec_finance_density_color','sec_finance_density_log_rounded_clipped_multiply_by_100','sec_finance','BLOCKID10']], 'geometry.coordinates', fill_color='sec_finance_density_color', opacity = 0.8, extruded = True, elevation = 'sec_finance_density_log_rounded_clipped_multiply_by_100')

# show_deck([finance_worker_density_layer], view, tooltip={"html": "<b>Block ID:</b> {BLOCKID10}<br><b>People working in finance:</b> {sec_finance}"})
# !mv /content/temp.html /content/drive/MyDrive/nyc_data/demand/city_proper_finance_worker_density.html

In [None]:
rac = block_gis.merge( rac_df, left_on='BLOCKID10', right_on='h_geocode')
view = pdk.data_utils.compute_view(flatten_list(rac['polygon'].tolist()))

In [None]:
var = 'sec_finance'
rac[var+'_prop'] = (rac[var]/rac['num_of_jobs']).round(2)
rac[var+'_prop_multiply_10_rounded'] = (rac[var+'_prop']*10).round(0).astype(int)
rac = add_color_column(rac, var+'_prop_multiply_10_rounded', var+'_prop_color')
var_resident_prop_layer = create_pdk_polygon_layer(rac[['geometry',var+'_prop_color',var,'BLOCKID10']], 'geometry.coordinates', fill_color=var+'_prop_color', opacity = 0.8, extruded = False)
show_deck([var_resident_prop_layer], view, tooltip={"html": "<b>Block ID:</b> {BLOCKID10}<br><b>Residents with this feature:</b> {"+var+"}"})
os.system('mv /content/temp.html /content/drive/MyDrive/nyc_data/demand/city_'+geospatial_scope+'_'+var+'_resident_block.html')

In [None]:
var = 'sec_professional'
rac[var+'_prop'] = (rac[var]/rac['num_of_jobs']).round(2)
rac[var+'_prop_multiply_10_rounded'] = (rac[var+'_prop']*10).round(0).astype(int)
rac = add_color_column(rac, var+'_prop_multiply_10_rounded', var+'_prop_color')
var_resident_prop_layer = create_pdk_polygon_layer(rac[['geometry',var+'_prop_color',var,'BLOCKID10']], 'geometry.coordinates', fill_color=var+'_prop_color', opacity = 0.8, extruded = False)
show_deck([var_resident_prop_layer], view, tooltip={"html": "<b>Block ID:</b> {BLOCKID10}<br><b>Residents with this feature:</b> {"+var+"}"})
os.system('mv /content/temp.html /content/drive/MyDrive/nyc_data/demand/city_'+geospatial_scope+'_'+var+'_resident_block.html')