In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
from geopandas.tools import overlay
warnings.filterwarnings('ignore')

#### Read in dataset and shapefiles.

- entrances is shapefile for the metro rail station entrances
- walkshedbuffer is..
- all_boundaries is shapefile of counties that metro lines passed from them
- Alljobs_MDDCVA is data of jobs for MD, DC, VA

In [2]:
entrances = gpd.read_file('../../Data/RailStationsEntrances/RailStationEntrances2023.shp')
walkshedbuffer = gpd.read_file('../../Data/unions/Union_walkshades.shp')
all_boundaries= gpd.read_file('../../Data/MD_DC_VA/Block MD_DC_VA.shp')
Alljobs_MDDCVA = pd.read_excel('../../Data/AlljobMDDCVA.xlsx')

#### Preprocessing

- Bring the shapefiles into common cordinate system
- rename w_geocode of job's table to GEOID20, ID name in block shapefile of DC, MD, VA
- Join jobs to boundary of case study, use outer to keep stations if they have not any similarity on GeoID
- intersect buffer of metro stations 0.5 miles with job boundaries

In [3]:
walkshedbuffer= walkshedbuffer.to_crs('EPSG:4326')
entrances = entrances.to_crs('EPSG:4326')
all_boundaries =all_boundaries.to_crs('EPSG:4326')

In [4]:
Alljobs_MDDCVA.rename(columns = {'w_geocode':'GEOID20'}, inplace = True)

all_boundaries["GEOID20"]= all_boundaries["GEOID20"].astype(np.int64)

In [5]:
Join_Jobs_Boundaries=pd.merge(all_boundaries,Alljobs_MDDCVA, on="GEOID20", how='outer')

In [6]:
walkshedbuffer = walkshedbuffer.to_crs ('EPSG:4326')
Join_Jobs_Boundaries = Join_Jobs_Boundaries.to_crs ('EPSG:4326')
Join_Jobs_Boundaries['fullarea'] = Join_Jobs_Boundaries.area

- get stations that intersects
- find the partial area of the intersected boundaries to get the proportional area


In [7]:
intersect_stations_Jobs = walkshedbuffer.overlay(Join_Jobs_Boundaries, how="intersection")

In [8]:
pd.set_option('display.max_columns', None)
intersect_stations_Jobs.head(2)

Unnamed: 0,Name_1,Acres,Shape_Leng_1,Shape_Area_1,StnCode,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20,Shape_Leng_2,Shape_Area_2,C000,CA01,CA02,CA03,CE01,CE02,CE03,CNS01,CNS02,CNS03,CNS04,CNS05,CNS06,CNS07,CNS08,CNS09,CNS10,CNS11,CNS12,CNS13,CNS14,CNS15,CNS16,CNS17,CNS18,CNS19,CNS20,CR01,CR02,CR03,CR04,CR05,CR07,CT01,CT02,CD01,CD02,CD03,CD04,CS01,CS02,CFA01,CFA02,CFA03,CFA04,CFA05,CFS01,CFS02,CFS03,CFS04,CFS05,createdate,fullarea,geometry
0,ADDISON ROAD-SEAT PLEASANT : 0 - 2640,246.285276,16545.122053,10728140.0,,24,33,802700,1000,240338027001000,Block 1000,2490.0,0.0,38.8868395,-76.9011164,0.0,0.0,0.003912,2.5849e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.5849e-07,"POLYGON ((-76.90028 38.88671, -76.90097 38.886..."
1,ADDISON ROAD-SEAT PLEASANT : 0 - 2640,246.285276,16545.122053,10728140.0,,24,33,802805,2004,240338028052004,Block 2004,132809.0,0.0,38.8889868,-76.8872501,351.0,938.0,0.016098,1.378871e-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.378871e-05,"POLYGON ((-76.88975 38.88900, -76.88853 38.888..."


In [9]:
intersect_stations_Jobs['partialarea'] = intersect_stations_Jobs.area
intersect_stations_Jobs['Name_1']=intersect_stations_Jobs['Name_1'].str.replace(' : 0 - 2640','')
intersect_stations_Jobs['Name_1']=intersect_stations_Jobs['Name_1'].str.replace(' : 0 - 22.4525758392805','')

In [10]:
### education job from LEHD WAC data in SLD tutorial: NAICS sector 61
### these codes is CNS15

sum_ninetofive_workers_bufstation= intersect_stations_Jobs.groupby(['Name_1'])[['CNS05','CNS09','CNS10','CNS11','CNS12'
                                                                             ,'CNS13','CNS14','CNS20']].apply(sum)
   

In [11]:
sum_ninetofive_workers_bufstation['Total Nine to Five workers']= sum_ninetofive_workers_bufstation.iloc[:, -8:-1].sum(axis=1)


In [12]:
sum_ninetofive_workers_bufstation.head(2)

Unnamed: 0_level_0,CNS05,CNS09,CNS10,CNS11,CNS12,CNS13,CNS14,CNS20,Total Nine to Five workers
Name_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ADDISON ROAD-SEAT PLEASANT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ANACOSTIA,0.0,0.0,24.0,28.0,268.0,16.0,532.0,888.0,868.0


In [13]:
sum_ninetofive_workers_bufstation = sum_ninetofive_workers_bufstation.drop(['CNS05','CNS09','CNS10','CNS11','CNS12'
                                                                             ,'CNS13','CNS14','CNS20'],axis = 1)

In [14]:
sum_ninetofive_workers_bufstation.head(3)

Unnamed: 0_level_0,Total Nine to Five workers
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,0.0
ANACOSTIA,868.0
ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0


In [15]:
totalarea_station = intersect_stations_Jobs.groupby(['Name_1'])[['fullarea']].apply(sum)
partialarea_station = intersect_stations_Jobs.groupby(['Name_1'])[['partialarea']].apply(sum)
multiplier = partialarea_station['partialarea']/totalarea_station['fullarea']
multiplier.head(3)

Name_1
ADDISON ROAD-SEAT PLEASANT             0.290270
ANACOSTIA                              0.396427
ARCHIVES-NAVY MEMORIAL-PENN QUARTER    0.714782
dtype: float64

In [16]:
proportion_jobs_walkstation = sum_ninetofive_workers_bufstation.multiply(multiplier, axis="index")
proportion_jobs_walkstation.rename(columns={'Total night and Weekend Jobs': 'Proportion night weekend jobs'}, inplace=True)
proportion_jobs_walkstation.head(2)

Unnamed: 0_level_0,Total Nine to Five workers
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,0.0
ANACOSTIA,344.098476


In [17]:
proportion_jobs_walkstation.to_excel("output/proportional_ninetofive_workers_stations.xlsx", sheet_name='ninetofive_workers_stations', index=True)