In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')
from utils import load_walksheds, overlay_wks

import geopandas as gpd
from geopandas.tools import overlay
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

#### Read in Datasets and Files

* walskhedbuffer is the Metro station walksheds with overlapping boundaries
* all_boundaries All boundaries for census blocks in MD, DC and VA
* Alljobs_MDDCVA contains information for all jobs in MD, DC and VA 

In [2]:
wksp5, wksp75 = load_walksheds()
all_boundaries= gpd.read_file("../../Data/Block MD_DC_VA.zip")
Alljobs_MDDCVA = pd.read_excel("../../Data/AlljobMDDCVA.xlsx")

#### Preprocessing


* Convert the walkshed boundaries into the same CRS as the all boundaries layer
* Rename the w_geocode column from jobs table to GEOID20 to match the ID column name in the block shapefile of DC, MD, VA
* Convert the GEOID20 column in the jobs table to strings 

In [3]:
all_boundaries =all_boundaries.to_crs(wksp5.crs)

In [4]:
Alljobs_MDDCVA.rename(columns = {'w_geocode':'GEOID20'}, inplace = True)
Alljobs_MDDCVA["GEOID20"]= Alljobs_MDDCVA["GEOID20"].astype(str)

* Join jobs to all_boundaries. Keep boundaries where there are no jobs
* Create a column of the full area of the jobs

In [5]:
Join_Jobs_Boundaries=pd.merge(all_boundaries,Alljobs_MDDCVA, on="GEOID20", how='left')

Join_Jobs_Boundaries['fullarea'] = Join_Jobs_Boundaries.area

* Intersect the jobs with the walksheds
* Create a partial area column to get the block area that is within a walkshed
* Fix the names of the stations by removing extra numbers/characters at the end

In [6]:
intp5, intp75 = overlay_wks(Join_Jobs_Boundaries)

In [7]:
intp5['partialarea'] = intp5.area
intp75['partialarea'] = intp75.area

* Group all of the jobs that apply to the night and weekend jobs category within the buffer of each station (these codes are CNS07 (retail jobs), CNS17, and CNS18 for resturants and entertainment)
* Sum the jobs within each group and delete the individual job category columns
* For each station, calculate the percent area of blocks that are within each station walkshed
* Multiply that percentage (multiplier) by the total number of jobs (Sum_nightjob_bufstation) to get the proportional number of night and weekend jobs
* Export the proporional number of night and weekend jobs

In [8]:
Sum_nightjob_bufstation_p5 = intp5.groupby(['Name_1'])[['CNS07','CNS17','CNS18']].apply(sum)
Sum_nightjob_bufstation_p75 = intp75.groupby(['Name_1'])[['CNS07','CNS17','CNS18']].apply(sum)


In [9]:
Sum_nightjob_bufstation_p5['Total night and Weekend Jobs']= Sum_nightjob_bufstation_p5.iloc[:, 0:3].sum(axis=1)
Sum_nightjob_bufstation_p5 = Sum_nightjob_bufstation_p5.drop(['CNS07','CNS17','CNS18'], axis=1)

Sum_nightjob_bufstation_p75['Total night and Weekend Jobs']= Sum_nightjob_bufstation_p75.iloc[:, 0:3].sum(axis=1)
Sum_nightjob_bufstation_p75 = Sum_nightjob_bufstation_p75.drop(['CNS07','CNS17','CNS18'], axis=1)
Sum_nightjob_bufstation_p5.head(2)

Unnamed: 0_level_0,Total night and Weekend Jobs
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,36.0
ANACOSTIA,340.0


In [10]:
totalarea_station_p5 = intp5.groupby(['Name_1'])[['fullarea']].apply(sum)
totalarea_station_p75 = intp75.groupby(['Name_1'])[['fullarea']].apply(sum)

partialarea_station_p5 = intp5.groupby(['Name_1'])[['partialarea']].apply(sum)
partialarea_station_p75 = intp75.groupby(['Name_1'])[['partialarea']].apply(sum)

multiplier_p5 = partialarea_station_p5['partialarea']/totalarea_station_p5['fullarea']
multiplier_p75 = partialarea_station_p75['partialarea']/totalarea_station_p75['fullarea']


In [11]:
proportion_jobs_walkstation_p5 = Sum_nightjob_bufstation_p5.multiply(multiplier_p5, axis="index")
proportion_jobs_walkstation_p5.rename(columns={'Total night and Weekend Jobs': 'Proportion_night_weekend_jobs_p5'}, inplace=True)

proportion_jobs_walkstation_p75 = Sum_nightjob_bufstation_p75.multiply(multiplier_p75, axis="index")
proportion_jobs_walkstation_p75.rename(columns={'Total night and Weekend Jobs': 'Proportion_night_weekend_jobs_p75'}, inplace=True)
proportion_jobs_walkstation_p5

Unnamed: 0_level_0,Proportion_night_weekend_jobs_p5
Name_1,Unnamed: 1_level_1
ADDISON ROAD-SEAT PLEASANT,10.449694
ANACOSTIA,134.784916
ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.000000
ARLINGTON CEMETERY,63.155243
Ashburn,471.516448
...,...
WHEATON,1388.708404
WHITE FLINT,2257.603743
WIEHLE-RESTON EAST,0.000000
WOODLEY PARK-ZOO/ADAMS MORGAN,3147.825035


In [12]:
proportion_jobs_walkstation_p5.to_excel("output/Proportional_night_weekend_jobs_ML_p5.xlsx", sheet_name='num of night jobs_stations', index=True)
proportion_jobs_walkstation_p75.to_excel("output/Proportional_night_weekend_jobs_ML_p75.xlsx", sheet_name='num of night jobs_stations', index=True)
