In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import openpyxl
import warnings
warnings.filterwarnings('ignore')
from geopandas.tools import overlay

#### Read in shapefiles


- entrances is data on rail station entrances for the study area
- walskhedbuffer is the Metro station walksheds with overlapping boundaries
- all_boundaries is shapefile of counties that metro lines passed from them
- read data of jobs for MD, DC, VA

In [2]:
entrances = gpd.read_file('../../Data/RailStationEntrances2023.zip')
wksp5, wksp75 = load_walksheds()
all_boundaries= gpd.read_file('../../Data/Merge Boundaries/MD_DC_VA/Block MD_DC_VA.shp')
Alljobs_MDDCVA = pd.read_excel('../../Data/AlljobMDDCVA.xlsx')

#### Preprocessing

- Bring the shapefiles into common cordinate system
- rename w_geocode of job's table to GEOID20, ID name in block shapefile of DC, MD, VA
- Join jobs to boundary of case study, use outer to keep stations if they have not any similarity on GeoID 

In [3]:
entrances = entrances.to_crs(wksp5.crs)
all_boundaries =all_boundaries.to_crs(wksp5.crs)

In [4]:
Alljobs_MDDCVA.rename(columns = {'w_geocode':'GEOID20'}, inplace = True)
Alljobs_MDDCVA.rename(columns = {'CNS15':'education_jobs'}, inplace = True)

In [5]:
Alljobs_MDDCVA["GEOID20"]= Alljobs_MDDCVA["GEOID20"].astype('str')

In [6]:
Join_Jobs_Boundaries=pd.merge(all_boundaries,Alljobs_MDDCVA, on="GEOID20", how='left')

In [7]:
Join_Jobs_Boundaries['fullarea'] = Join_Jobs_Boundaries.area

- intersect buffer of metro stations 0.5 miles with job boundaries
- find the partial area of the intersected boundaries to get the proportional area


In [8]:
intp5, intp75 = overlay_wks(Join_Jobs_Boundaries)

In [9]:
intp5['partialarea'] = intp5.area
intp75['partialarea'] = intp75.area

In [10]:
### education job from LEHD WAC data in SLD tutorial: NAICS sector 61
### these codes is CNS15

sum_education_job_bufstation_p5 = intp5.groupby(['Name_1'])[['education_jobs']].apply(sum)
sum_education_job_bufstation_p75 = intp75.groupby(['Name_1'])[['education_jobs']].apply(sum)   

In [11]:
totalarea_station_p5 = intp5.groupby(['Name_1'])[['fullarea']].apply(sum)
totalarea_station_p75 = intp75.groupby(['Name_1'])[['fullarea']].apply(sum)

partialarea_station_p5 = intp5.groupby(['Name_1'])[['partialarea']].apply(sum)
partialarea_station_p75 = intp75.groupby(['Name_1'])[['partialarea']].apply(sum)

multiplier_p5  = partialarea_station_p5['partialarea']/totalarea_station_p5['fullarea']
multiplier_p75 = partialarea_station_p75['partialarea']/totalarea_station_p75['fullarea']

In [12]:
proportion_jobs_walkstation_p5 = sum_education_job_bufstation_p5.multiply(multiplier_p5, axis="index")
proportion_jobs_walkstation_p75 = sum_education_job_bufstation_p75.multiply(multiplier_p75, axis="index")

proportion_jobs_walkstation_p5.rename(columns={'education_jobs': 'Proportion_education_jobs_p5'}, inplace=True)
proportion_jobs_walkstation_p75.rename(columns={'education_jobs': 'Proportion_education_jobs_p75'}, inplace=True)


proportion_jobs_walkstation_p5.to_excel("output/proportion_education_jobs_stations_p5.xlsx", sheet_name='num of education_jobs_stations', index=True)
proportion_jobs_walkstation_p75.to_excel("output/proportion_education_jobs_stations_p75.xlsx", sheet_name='num of education_jobs_stations', index=True)

