In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
from geopandas.tools import overlay
warnings.filterwarnings('ignore')

#### Read in dataset and shapefiles.

- entrances is shapefile for the metro rail station entrances
- walkshedbuffer is..
- all_boundaries is shapefile of counties that metro lines passed from them
- Alljobs_MDDCVA is data of jobs for MD, DC, VA

In [2]:
entrances = gpd.read_file('../../Data/RailStationsEntrances/RailStationEntrances2023.shp')
wksp5, wksp75 = load_walksheds()
all_boundaries= gpd.read_file('../../Data/MD_DC_VA/Block MD_DC_VA.shp')
Alljobs_MDDCVA = pd.read_excel('../../Data/AlljobMDDCVA.xlsx')

#### Preprocessing

- Bring the shapefiles into common cordinate system
- rename w_geocode of job's table to GEOID20, ID name in block shapefile of DC, MD, VA
- Join jobs to boundary of case study, use outer to keep stations if they have not any similarity on GeoID
- intersect buffer of metro stations 0.5 miles with job boundaries

In [3]:
entrances = entrances.to_crs('EPSG:4326')
all_boundaries =all_boundaries.to_crs('EPSG:4326')

In [4]:
Alljobs_MDDCVA.rename(columns = {'w_geocode':'GEOID20'}, inplace = True)

all_boundaries["GEOID20"]= all_boundaries["GEOID20"].astype(np.int64)

In [5]:
Join_Jobs_Boundaries=pd.merge(all_boundaries,Alljobs_MDDCVA, on="GEOID20", how='outer')

In [6]:
Join_Jobs_Boundaries = Join_Jobs_Boundaries.to_crs ('EPSG:4326')
Join_Jobs_Boundaries['fullarea'] = Join_Jobs_Boundaries.area

- get stations that intersects
- find the partial area of the intersected boundaries to get the proportional area


In [7]:
intp5, intp75 = overlay_wks(Join_Jobs_Boundaries)
# intersect_stations_Jobs = walkshedbuffer.overlay(Join_Jobs_Boundaries, how="intersection")

In [8]:
pd.set_option('display.max_columns', None)
intp5.head(2)

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20,Shape_Leng_1,Shape_Area_1,C000,CA01,CA02,CA03,CE01,CE02,CE03,CNS01,CNS02,CNS03,CNS04,CNS05,CNS06,CNS07,CNS08,CNS09,CNS10,CNS11,CNS12,CNS13,CNS14,CNS15,CNS16,CNS17,CNS18,CNS19,CNS20,CR01,CR02,CR03,CR04,CR05,CR07,CT01,CT02,CD01,CD02,CD03,CD04,CS01,CS02,CFA01,CFA02,CFA03,CFA04,CFA05,CFS01,CFS02,CFS03,CFS04,CFS05,createdate,fullarea,Name_1,Acres,Shape_Leng_2,Shape_Area_2,StnCode,geometry
0,24,31,704404,1004,240317044041004,Block 1004,224514.0,415.0,39.0066882,-77.09356,59.0,152.0,0.042248,2.3e-05,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20211018.0,2.3e-05,MEDICAL CENTER,379.404828,19251.879962,16526810.0,,"POLYGON ((-77.09737 39.00454, -77.09740 39.004..."
1,24,31,705000,4005,240317050004005,Block 4005,74301.0,579.0,39.0020246,-77.0962632,0.0,0.0,0.011929,8e-06,8.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20211018.0,8e-06,MEDICAL CENTER,379.404828,19251.879962,16526810.0,,"POLYGON ((-77.09535 39.00312, -77.09522 39.002..."


In [9]:
intp5['partialarea'] = intp5.area
intp75['partialarea'] = intp75.area

In [10]:
### education job from LEHD WAC data in SLD tutorial: NAICS sector 61
### these codes is CNS15

sum_ninetofive_workers_bufstation_p5 = intp5.groupby(['Name_1'])[['CNS05','CNS09','CNS10','CNS11','CNS12'
                                                                             ,'CNS13','CNS14','CNS20']].apply(sum)


sum_ninetofive_workers_bufstation_p75 = intp75.groupby(['Name_1'])[['CNS05','CNS09','CNS10','CNS11','CNS12'
                                                                             ,'CNS13','CNS14','CNS20']].apply(sum)
   

In [11]:
sum_ninetofive_workers_bufstation_p5['Total_Nine_to_Five_workers_p5'] = sum_ninetofive_workers_bufstation_p5.iloc[:, -8:-1].sum(axis=1)

sum_ninetofive_workers_bufstation_p75['Total_Nine_to_Five_workers_p75']= sum_ninetofive_workers_bufstation_p75.iloc[:, -8:-1].sum(axis=1)

In [12]:
sum_ninetofive_workers_bufstation_p5.head(2)

Unnamed: 0_level_0,CNS05,CNS09,CNS10,CNS11,CNS12,CNS13,CNS14,CNS20,Total_Nine_to_Five_workers_p5
Name_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ADDISON ROAD-SEAT PLEASANT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ANACOSTIA,0.0,0.0,24.0,28.0,268.0,16.0,532.0,888.0,868.0


In [13]:
sum_ninetofive_workers_bufstation_p5 = sum_ninetofive_workers_bufstation_p5.drop(['CNS05','CNS09','CNS10','CNS11','CNS12'
                                                                             ,'CNS13','CNS14','CNS20'],axis = 1)

sum_ninetofive_workers_bufstation_p75 = sum_ninetofive_workers_bufstation_p75.drop(['CNS05','CNS09','CNS10','CNS11','CNS12'
                                                                             ,'CNS13','CNS14','CNS20'],axis = 1)

In [14]:
sum_ninetofive_workers_bufstation_p5.head(3)

Unnamed: 0_level_0,Total_Nine_to_Five_workers_p5
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,0.0
ANACOSTIA,868.0
ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0


In [15]:
totalarea_station_p5 = intp5.groupby(['Name_1'])[['fullarea']].apply(sum)
totalarea_station_p75 = intp75.groupby(['Name_1'])[['fullarea']].apply(sum)

partialarea_station_p5 = intp5.groupby(['Name_1'])[['partialarea']].apply(sum)
partialarea_station_p75 = intp75.groupby(['Name_1'])[['partialarea']].apply(sum)

multiplier_p5 = partialarea_station_p5['partialarea']/totalarea_station_p5['fullarea']
multiplier_p75 = partialarea_station_p75['partialarea']/totalarea_station_p75['fullarea']

multiplier_p5.head(3)

Name_1
ADDISON ROAD-SEAT PLEASANT             0.290269
ANACOSTIA                              0.396426
ARCHIVES-NAVY MEMORIAL-PENN QUARTER    0.714782
dtype: float64

In [16]:
proportion_jobs_walkstation_p5 = sum_ninetofive_workers_bufstation_p5.multiply(multiplier_p5, axis="index")
proportion_jobs_walkstation_p75 = sum_ninetofive_workers_bufstation_p75.multiply(multiplier_p75, axis="index")


proportion_jobs_walkstation_p5.rename(columns={'Total night and Weekend Jobs': 'Proportion_night_weekend_jobs_p5'}, inplace=True)
proportion_jobs_walkstation_p75.rename(columns={'Total night and Weekend Jobs': 'Proportion_night_weekend_jobs_p75'}, inplace=True)

proportion_jobs_walkstation_p5.head(2)

Unnamed: 0_level_0,Total_Nine_to_Five_workers_p5
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,0.0
ANACOSTIA,344.097962


In [17]:
proportion_jobs_walkstation_p5.reset_index().to_excel("output/proportional_ninetofive_workers_stations_p5.xlsx", sheet_name='ninetofive_workers_stations_p5', index=True)
proportion_jobs_walkstation_p75.reset_index().to_excel("output/proportional_ninetofive_workers_stations_p75.xlsx", sheet_name='ninetofive_workers_stations_p75', index=True)
