In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks

import pandas as pd
import numpy as np
import openpyxl
import geopandas as gpd
import warnings
import os
warnings.filterwarnings('ignore')

os.makedirs('output', exist_ok=True)

* walskhedbuffer is the Metro station walksheds with overlapping boundaries
* all_boundaries All boundaries for census blocks in MD, DC and VA
* Alljobs_MDDCVA contains information for all jobs in MD, DC and VA

In [2]:
wksp5, wksp75 = load_walksheds()
all_boundaries= gpd.read_file('../../Data/MD_DC_VA/Block MD_DC_VA.shp')
Alljobs_MDDCVA = pd.read_excel('../../Data/AlljobMDDCVA.xlsx')

* Rename the w_geocode column from jobs table to GEOID20 to match the ID column name in the block shapefile of DC, MD, VA
* Convert the GEOID20 column in the jobs table to strings

In [3]:
Alljobs_MDDCVA.rename(columns = {'w_geocode':'GEOID20'}, inplace = True)

all_boundaries["GEOID20"]= all_boundaries["GEOID20"].astype(np.int64)

* Join jobs to all_boundaries. Keep boundaries where there are no jobs
* Change the CRS of the boundaries to match the walksheds
* Create a column of the full area of the jobs

In [4]:
Join_Jobs_Boundaries=pd.merge(all_boundaries,Alljobs_MDDCVA, on="GEOID20", how='outer')
Join_Jobs_Boundaries = Join_Jobs_Boundaries.to_crs ('EPSG:4326')
Join_Jobs_Boundaries['fullarea'] = Join_Jobs_Boundaries.area

* Intersect the jobs with the walksheds
* Create a partial area column to get the block area that is within a walkshed
* Fix the names of the stations by removing extra numbers/characters at the end

In [5]:
intp5, intp75 = overlay_wks(Join_Jobs_Boundaries)
# intersect_stations_Jobs = walkshedbuffer.overlay(Join_Jobs_Boundaries, how="intersection")

In [6]:
pd.set_option('display.max_columns', None)
intp5.head(2)

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20,Shape_Leng_1,Shape_Area_1,C000,CA01,CA02,CA03,CE01,CE02,CE03,CNS01,CNS02,CNS03,CNS04,CNS05,CNS06,CNS07,CNS08,CNS09,CNS10,CNS11,CNS12,CNS13,CNS14,CNS15,CNS16,CNS17,CNS18,CNS19,CNS20,CR01,CR02,CR03,CR04,CR05,CR07,CT01,CT02,CD01,CD02,CD03,CD04,CS01,CS02,CFA01,CFA02,CFA03,CFA04,CFA05,CFS01,CFS02,CFS03,CFS04,CFS05,createdate,fullarea,Name_1,Acres,Shape_Leng_2,Shape_Area_2,StnCode,geometry
0,24,31,704404,1004,240317044041004,Block 1004,224514.0,415.0,39.0066882,-77.09356,59.0,152.0,0.042248,2.3e-05,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20211018.0,2.3e-05,MEDICAL CENTER,379.404828,19251.879962,16526810.0,,"POLYGON ((-77.09737 39.00454, -77.09740 39.004..."
1,24,31,705000,4005,240317050004005,Block 4005,74301.0,579.0,39.0020246,-77.0962632,0.0,0.0,0.011929,8e-06,8.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20211018.0,8e-06,MEDICAL CENTER,379.404828,19251.879962,16526810.0,,"POLYGON ((-77.09535 39.00312, -77.09522 39.002..."


In [7]:
intp5['partialarea'] = intp5.area
intp75['partialarea'] = intp75.area

* Sum all of the public admin jobs within the buffer of each station
* Rename the jobs column
* For each station, calculate the percent area of blocks that are within each station walkshed
* Multiply that percentage (multiplier) by the total number of jobs (Sum_job_bufstation) to get the proportional number of jobs
* Export the proporional number of jobs

In [8]:

sum_public_admin_bufstation_p5 = intp5.groupby(['Name_1'])[['CNS20']].apply(sum)
sum_public_admin_bufstation_p75 = intp75.groupby(['Name_1'])[['CNS20']].apply(sum)
   

In [9]:
sum_public_admin_bufstation_p5.head(2)

Unnamed: 0_level_0,CNS20
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,0.0
ANACOSTIA,888.0


In [10]:
totalarea_station_p5 = intp5.groupby(['Name_1'])[['fullarea']].apply(sum)
totalarea_station_p75 = intp75.groupby(['Name_1'])[['fullarea']].apply(sum)

partialarea_station_p5 = intp5.groupby(['Name_1'])[['partialarea']].apply(sum)
partialarea_station_p75 = intp75.groupby(['Name_1'])[['partialarea']].apply(sum)

multiplier_p5 = partialarea_station_p5['partialarea']/totalarea_station_p5['fullarea']
multiplier_p75 = partialarea_station_p75['partialarea']/totalarea_station_p75['fullarea']

multiplier_p5.head(3)

Name_1
ADDISON ROAD-SEAT PLEASANT             0.290269
ANACOSTIA                              0.396426
ARCHIVES-NAVY MEMORIAL-PENN QUARTER    0.714782
dtype: float64

In [16]:
proportion_jobs_walkstation_p5 = sum_public_admin_bufstation_p5.multiply(multiplier_p5, axis="index")
proportion_jobs_walkstation_p75 = sum_public_admin_bufstation_p75.multiply(multiplier_p75, axis="index")

proportion_jobs_walkstation_p5.head(2)

Unnamed: 0_level_0,CNS20
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,0.0
ANACOSTIA,352.026486


In [17]:
proportion_jobs_walkstation_p5.to_csv("output/proportional_publicadmin_workers_stations_p5.csv")
proportion_jobs_walkstation_p75.to_csv("output/proportional_publicadmin_workers_stations_p75.csv")

