In [1]:
import geopandas as gpd
from geopandas.tools import overlay
import pandas as pd
import matplotlib.pyplot as plt
import openpyxl
import warnings
import os
warnings.filterwarnings("ignore")

#### Read in Datasets and Files

* walskhedbuffer is the Metro station walksheds with overlapping boundaries
* all_boundaries All boundaries for census blocks in MD, DC and VA
* Alljobs_MDDCVA contains information for all jobs in MD, DC and VA 

In [3]:
walkshedbuffer = gpd.read_file("../../Data/walksheds_overlapping.zip")
all_boundaries= gpd.read_file("../../Data/Block MD_DC_VA.zip")
Alljobs_MDDCVA = pd.read_excel("../../Data/AlljobMDDCVA.xlsx")

#### Preprocessing


* Rename the w_geocode column from jobs table to GEOID20 to match the ID column name in the block shapefile of DC, MD, VA
* Convert the GEOID20 column in the jobs table to strings 

In [4]:
Alljobs_MDDCVA.rename(columns = {'w_geocode':'GEOID20'}, inplace = True)
Alljobs_MDDCVA["GEOID20"]= Alljobs_MDDCVA["GEOID20"].astype('str')

* Join jobs to all_boundaries. Keep boundaries where there are no jobs

In [5]:
Join_Jobs_Boundaries=pd.merge(all_boundaries, Alljobs_MDDCVA, on="GEOID20", how='left')
Join_Jobs_Boundaries.head(2)

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,...,CFA02,CFA03,CFA04,CFA05,CFS01,CFS02,CFS03,CFS04,CFS05,createdate
0,24,31,700204,1016,240317002041016,Block 1016,2882692.0,10982.0,39.3110468,-77.189444,...,,,,,,,,,,
1,24,31,705902,1008,240317059021008,Block 1008,112639.0,502.0,38.9700155,-77.1364633,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20211018.0


* Change the CRS of the boundaries to match the walksheds
* Create a column of the full area of the jobs

In [6]:
walkshedbuffer= walkshedbuffer.to_crs('EPSG:4326')
Join_Jobs_Boundaries = Join_Jobs_Boundaries.to_crs('EPSG:4326')
Join_Jobs_Boundaries['fullarea'] = Join_Jobs_Boundaries.area

* Intersect the jobs with the walksheds
* Create a partial area column to get the block area that is within a walkshed
* Fix the names of the stations by removing extra numbers/characters at the end

In [7]:
intersect_stations_Jobs = walkshedbuffer.overlay(Join_Jobs_Boundaries, how="intersection")

In [8]:
intersect_stations_Jobs['partialarea'] = intersect_stations_Jobs.area  # each block is within which specific walkshedbuffer

In [9]:
intersect_stations_Jobs.head(2)

Unnamed: 0,Name_1,Acres,Shape_Leng_1,Shape_Area_1,StnCode,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,...,CFA05,CFS01,CFS02,CFS03,CFS04,CFS05,createdate,fullarea,geometry,partialarea
0,ADDISON ROAD-SEAT PLEASANT : 0 - 2640,246.285276,16545.122053,10728140.0,,24,33,802700,1000,240338027001000,...,,,,,,,,2.584903e-07,"POLYGON ((-76.90028 38.88671, -76.90097 38.886...",2.584903e-07
1,ADDISON ROAD-SEAT PLEASANT : 0 - 2640,246.285276,16545.122053,10728140.0,,24,33,802805,2004,240338028052004,...,,,,,,,,1.378872e-05,"POLYGON ((-76.88975 38.88900, -76.88853 38.888...",1.7865e-06


In [10]:
intersect_stations_Jobs['Name_1']=intersect_stations_Jobs['Name_1'].str.replace(' : 0 - 2640','')
intersect_stations_Jobs['Name_1']=intersect_stations_Jobs['Name_1'].str.replace(' : 0 - 22.4525758392805','')


* Sum all of the jobs within the buffer of each station
* Rename the jobs column
* For each station, calculate the percent area of blocks that are within each station walkshed
* Multiply that percentage (multiplier) by the total number of jobs (Sum_job_bufstation) to get the proportional number of jobs
* Export the proporional number of jobs

In [11]:
### summation all jobs for each intersection###
Sum_job_bufstation = intersect_stations_Jobs.groupby(['Name_1'])[['C000']].apply(sum) 
Sum_job_bufstation.rename(columns = {'C000':'All_Jobs'}, inplace = True)

In [12]:
totalarea_station = intersect_stations_Jobs.groupby(['Name_1'])[['fullarea']].apply(sum)
partialarea_station = intersect_stations_Jobs.groupby(['Name_1'])[['partialarea']].apply(sum)
multiplier = partialarea_station['partialarea']/totalarea_station['fullarea']

In [13]:
proportion_Jobs_walkstation = Sum_job_bufstation.multiply(multiplier, axis="index")
proportion_Jobs_walkstation.rename(columns={'All Jobs': 'Proportion Jobs'}, inplace=True)
proportion_Jobs_walkstation.head(2)

Unnamed: 0_level_0,All_Jobs
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,205.510639
ANACOSTIA,2332.571897


In [14]:
proportion_Jobs_walkstation.to_excel("output/proportional_jobs_stations.xlsx", sheet_name='num of jobs_stations', index=True)