In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks
from functools import reduce
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from geopandas.tools import overlay
import warnings
warnings.filterwarnings('ignore')

#### Read in dataset and shapefiles.

- Walkshedbuffer is data of buffer around the walksheds without overlapping <br/><br/>

- Household_MD data on household in Maryland <br/><br/>
- Household_DC data on household in DC <br/><br/>
- Household_VA data on household in Virginia <br/><br/>
- all_boundaries is shapefile of counties that metro lines passed from them

In [2]:
wksp5, wksp75 = load_walksheds()
Household_MD = pd.read_excel ("../../Data/MD_DECENNIALPL2020.H1-Data.xlsx")
Household_DC = pd.read_excel ("../../Data/DC_DECENNIALPL2020.H1-Data.xlsx")
Household_VA = pd.read_excel ("../../Data/VA_DECENNIALPL2020.H1-Data.xlsx")
all_boundaries = gpd.read_file("../../Data/MD_DC_VA/Block MD_DC_VA.shp")

#### Preprocessing

 - merge the three Housholds MD, DC, and VA to one file

In [3]:
dataframes = [Household_MD, Household_DC, Household_VA]

Housholds_merge = reduce(lambda left, right: pd.merge(left, right, how='outer'), dataframes)

- remove 1000000US from GEOID20 column
- rename GEO_ID of household's table to GEOID20, ID name in block shapefile of DC, MD, VA
- join household file to boundary 

In [4]:
Housholds_merge['GEO_ID'] = Housholds_merge['GEO_ID'].str.replace('1000000US', '')
Housholds_merge.rename(columns = {'GEO_ID':'GEOID20'}, inplace = True)
Housholds_merge.rename(columns = {'H1_001N':'Total Households'}, inplace = True)

Join_household_Boundaries=Join_household_Boundaries=pd.merge(all_boundaries,Housholds_merge, on="GEOID20", how='outer')

In [5]:
Join_household_Boundaries.head(2)

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,...,Shape_Leng,Shape_Area,geometry,NAME,Total Households,H1_001NA,H1_002N,H1_002NA,H1_003N,H1_003NA
0,24,31,700204,1016,240317002041016,Block 1016,2882692.0,10982.0,39.3110468,-77.189444,...,0.07783,0.000302,"POLYGON ((-77.17597 39.31191, -77.17622 39.311...","Block 1016, Block Group 1, Census Tract 7002.0...",110,,103,,7,
1,24,31,705902,1008,240317059021008,Block 1008,112639.0,502.0,38.9700155,-77.1364633,...,0.021442,1.2e-05,"POLYGON ((-77.13358 38.97015, -77.13326 38.969...","Block 1008, Block Group 1, Census Tract 7059.0...",52,,48,,4,


- fix coordinate system of metro buffer and Join_household_Boundaries
- find the full area of the boundaries before the intersection
- intersect households within 0.5 miles of metro station
- find the partial area of the intersected boundaries to get the proportional area
- find proportion of houses within the walkshed



In [6]:
Join_household_Boundaries = Join_household_Boundaries.to_crs ('EPSG:4326')

In [7]:
Join_household_Boundaries['fullarea'] = Join_household_Boundaries.area

In [8]:
intp5, intp75 = overlay_wks(Join_household_Boundaries)

In [9]:
intp5['partialarea'] = intp5.area
intp75['partialarea'] = intp75.area

In [10]:
intp5['proportionarea'] = intp5['partialarea'] / intp5['fullarea']
intp75['proportionarea'] = intp75['partialarea'] / intp75['fullarea']

intp5['proportionhouses'] = intp5['proportionarea'] * intp5['Total Households']
intp75['proportionhouses'] = intp75['proportionarea'] * intp75['Total Households']

In [11]:
Sum_households_bufstation_p5 = intp5.groupby(['Name_1'])[['proportionhouses']].apply(sum).reset_index()
Sum_households_bufstation_p75 = intp75.groupby(['Name_1'])[['proportionhouses']].apply(sum).reset_index()

Sum_households_bufstation_p5.head(3)

Unnamed: 0,Name_1,proportionhouses
0,ADDISON ROAD-SEAT PLEASANT,528.921138
1,ANACOSTIA,2075.704342
2,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,2214.692242


In [12]:
total_households_bufstation_p5 = intp5.groupby(['Name_1'])[['Total Households']].apply(sum)
total_households_bufstation_p75 = intp75.groupby(['Name_1'])[['Total Households']].apply(sum)

Sum_households_bufstation_p5['Total Households'] = total_households_bufstation_p5['Total Households']
Sum_households_bufstation_p75['Total Households'] = total_households_bufstation_p75['Total Households']

In [13]:
# Sum_households_bufstation.reset_index(inplace=True)
Sum_households_bufstation_p5['Name_1'] = Sum_households_bufstation_p5['Name_1'].str.title()
Sum_households_bufstation_p75['Name_1'] = Sum_households_bufstation_p75['Name_1'].str.title()

Sum_households_bufstation_p5['Name_1'] = Sum_households_bufstation_p5['Name_1'].replace({"Addison Road-Seat Pleasant": 'Addison Road', "Archives-Navy Memorial-Penn Quarter":"Archives", "Ballston-Mu": "Ballston-MU", "Brookland-Cua":"Brookland-CUA", "Dunn Loring-Merrifield": "Dunn Loring", "Largo Town Center":"Downtown Largo", "Eisenhower Avenue":"Eisenhower Ave", 'Federal Center Sw':'Federal Center SW', "Foggy Bottom-Gwu":"Foggy Bottom-GWU","Gallery Pl-Chinatown":"Gallery Place", "Mclean":"McLean", "Mcpherson Square":"McPherson Sq", "Mt Vernon Sq 7Th St-Convention Center":"Mt Vernon Sq", "Noma-Gallaudet":"NoMa-Gallaudet U", "Rhode Island Ave-Brentwood":"Rhode Island Ave",  "Prince George'S Plaza":"Hyattsville Crossing", "Tenleytown-Au": "Tenleytown-AU", "Tysons Corner":"Tysons", "U Street/African-Amer Civil War Memorial/Cardozo": "U Street", "Southern Avenue": "Southern Ave", "Van Ness-Udc":"Van Ness-UDC", "Virginia Square-Gmu":"Virginia Sq-GMU", "Vienna/Fairfax-Gmu":"Vienna", "Washington Dulles International Airport":"Dulles Airport", "West Falls Church-Vt/Uva":"West Falls Church", "Woodley Park-Zoo/Adams Morgan":"Woodley Park", "White Flint":"North Bethesda", "College Park-U Of Md": 'College Park-U of Md' })
Sum_households_bufstation_p75['Name_1'] = Sum_households_bufstation_p75['Name_1'].replace({"Addison Road-Seat Pleasant": 'Addison Road', "Archives-Navy Memorial-Penn Quarter":"Archives", "Ballston-Mu": "Ballston-MU", "Brookland-Cua":"Brookland-CUA", "Dunn Loring-Merrifield": "Dunn Loring", "Largo Town Center":"Downtown Largo", "Eisenhower Avenue":"Eisenhower Ave", 'Federal Center Sw':'Federal Center SW', "Foggy Bottom-Gwu":"Foggy Bottom-GWU","Gallery Pl-Chinatown":"Gallery Place", "Mclean":"McLean", "Mcpherson Square":"McPherson Sq", "Mt Vernon Sq 7Th St-Convention Center":"Mt Vernon Sq", "Noma-Gallaudet":"NoMa-Gallaudet U", "Rhode Island Ave-Brentwood":"Rhode Island Ave",  "Prince George'S Plaza":"Hyattsville Crossing", "Tenleytown-Au": "Tenleytown-AU", "Tysons Corner":"Tysons", "U Street/African-Amer Civil War Memorial/Cardozo": "U Street", "Southern Avenue": "Southern Ave", "Van Ness-Udc":"Van Ness-UDC", "Virginia Square-Gmu":"Virginia Sq-GMU", "Vienna/Fairfax-Gmu":"Vienna", "Washington Dulles International Airport":"Dulles Airport", "West Falls Church-Vt/Uva":"West Falls Church", "Woodley Park-Zoo/Adams Morgan":"Woodley Park", "White Flint":"North Bethesda", "College Park-U Of Md": 'College Park-U of Md' })


In [14]:
mstns = pd.read_excel("../../Data/mstn_id_to_stn_name.xlsx")
Sum_households_bufstation_p5 = Sum_households_bufstation_p5.merge(mstns, right_on = 'PRIMARY_NAME_FY23', left_on = 'Name_1')
Sum_households_bufstation_p75 = Sum_households_bufstation_p75.merge(mstns, right_on = 'PRIMARY_NAME_FY23', left_on = 'Name_1')

Sum_households_bufstation_p5.tail()

Unnamed: 0,Name_1,proportionhouses,Total Households,ID,PRIMARY_NAME_FY23
92,Wheaton,2712.23219,,MSTN_046,Wheaton
93,North Bethesda,4011.345203,,MSTN_045,North Bethesda
94,Wiehle-Reston East,1226.261747,,MSTN_091,Wiehle-Reston East
95,Woodley Park,5365.381132,,MSTN_040,Woodley Park
96,Dulles Airport,0.0,,MSTN_095,Dulles Airport


In [15]:
Sum_households_bufstation_p5['proportionhouses'] = Sum_households_bufstation_p5['proportionhouses']/1000
Sum_households_bufstation_p75['proportionhouses'] = Sum_households_bufstation_p75['proportionhouses']/1000

In [16]:
Sum_households_bufstation_p5.to_excel("output/walkshed_proportional_households_stations_p5.xlsx", sheet_name='num of households_stations', index=True)
Sum_households_bufstation_p75.to_excel("output/walkshed_proportional_households_stations_p75.xlsx", sheet_name='num of households_stations', index=True)
