In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks

import geopandas as gpd
from geopandas.tools import overlay
import pandas as pd
import matplotlib.pyplot as plt
import openpyxl
import warnings
import os
warnings.filterwarnings("ignore")

#### Read in Datasets and Files

* walskhedbuffer is the Metro station walksheds with overlapping boundaries
* all_boundaries All boundaries for census blocks in MD, DC and VA
* Alljobs_MDDCVA contains information for all jobs in MD, DC and VA 

In [2]:
wksp5, wksp75 = load_walksheds()
all_boundaries= gpd.read_file("../../Data/Block MD_DC_VA.zip")
Alljobs_MDDCVA = pd.read_excel("../../Data/AlljobMDDCVA.xlsx")

#### Preprocessing


* Rename the w_geocode column from jobs table to GEOID20 to match the ID column name in the block shapefile of DC, MD, VA
* Convert the GEOID20 column in the jobs table to strings 

In [3]:
Alljobs_MDDCVA.rename(columns = {'w_geocode':'GEOID20'}, inplace = True)
Alljobs_MDDCVA["GEOID20"]= Alljobs_MDDCVA["GEOID20"].astype('str')

* Join jobs to all_boundaries. Keep boundaries where there are no jobs

In [4]:
Join_Jobs_Boundaries=pd.merge(all_boundaries, Alljobs_MDDCVA, on="GEOID20", how='left')
Join_Jobs_Boundaries.head(2)

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,...,CFA02,CFA03,CFA04,CFA05,CFS01,CFS02,CFS03,CFS04,CFS05,createdate
0,24,31,700204,1016,240317002041016,Block 1016,2882692.0,10982.0,39.3110468,-77.189444,...,,,,,,,,,,
1,24,31,705902,1008,240317059021008,Block 1008,112639.0,502.0,38.9700155,-77.1364633,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20211018.0


* Change the CRS of the boundaries to match the walksheds
* Create a column of the full area of the jobs

In [5]:
Join_Jobs_Boundaries = Join_Jobs_Boundaries.to_crs('EPSG:4326')
Join_Jobs_Boundaries['fullarea'] = Join_Jobs_Boundaries.area

* Intersect the jobs with the walksheds
* Create a partial area column to get the block area that is within a walkshed
* Fix the names of the stations by removing extra numbers/characters at the end

In [6]:
intp5, intp75 = overlay_wks(Join_Jobs_Boundaries)

In [7]:
intp5['partialarea'] = intp5.area  # each block is within which specific walkshedbuffer of 0.5
intp75['partialarea'] = intp75.area  # each block is within which specific walkshedbuffer of 0.75

In [8]:
intp5.head(2)

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,...,CFS05,createdate,fullarea,Name_1,Acres,Shape_Leng_2,Shape_Area_2,StnCode,geometry,partialarea
0,24,31,704404,1004,240317044041004,Block 1004,224514.0,415.0,39.0066882,-77.09356,...,0.0,20211018.0,2.3e-05,MEDICAL CENTER,379.404828,19251.879962,16526810.0,,"POLYGON ((-77.09737 39.00454, -77.09740 39.004...",2e-06
1,24,31,705000,4005,240317050004005,Block 4005,74301.0,579.0,39.0020246,-77.0962632,...,0.0,20211018.0,8e-06,MEDICAL CENTER,379.404828,19251.879962,16526810.0,,"POLYGON ((-77.09535 39.00312, -77.09522 39.002...",8e-06


* Sum all of the jobs within the buffer of each station
* Rename the jobs column
* For each station, calculate the percent area of blocks that are within each station walkshed
* Multiply that percentage (multiplier) by the total number of jobs (Sum_job_bufstation) to get the proportional number of jobs
* Export the proporional number of jobs

In [9]:
### summation all jobs for each intersection###
Sum_job_bufstation_p5 = intp5.groupby(['Name_1'])[['C000']].apply(sum) 
Sum_job_bufstation_p5.rename(columns = {'C000':'All_Jobs_p5'}, inplace = True)

Sum_job_bufstation_p75 = intp75.groupby(['Name_1'])[['C000']].apply(sum) 
Sum_job_bufstation_p75.rename(columns = {'C000':'All_Jobs_p75'}, inplace = True)
Sum_job_bufstation_p5.head()

Unnamed: 0_level_0,All_Jobs_p5
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,708.0
ANACOSTIA,5884.0
ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0
ARLINGTON CEMETERY,488.0
Ashburn,6452.0


In [15]:
totalarea_station = intersect_stations_Jobs.groupby(['Name_1'])[['fullarea']].apply(sum)
partialarea_station = intersect_stations_Jobs.groupby(['Name_1'])[['partialarea']].apply(sum)
multiplier = partialarea_station['partialarea']/totalarea_station['fullarea']

In [16]:
proportion_Jobs_walkstation = Sum_job_bufstation.multiply(multiplier, axis="index")
proportion_Jobs_walkstation.rename(columns={'All Jobs': 'Proportion Jobs'}, inplace=True)
proportion_Jobs_walkstation.head(2)

Unnamed: 0_level_0,All_Jobs
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,205.510841
ANACOSTIA,2332.575383


In [17]:
proportion_Jobs_walkstation.to_excel("output/proportional_jobs_stations.xlsx", sheet_name='num of jobs_stations', index=True)