In [1]:
import os
os.makedirs('output', exist_ok=True)

import sys
sys.path.append('../')

from utils import load_walksheds, overlay_wks
import pandas as pd
import numpy as np
import geopandas as gpd
import warnings
import os
warnings.filterwarnings('ignore')

* load in walkshed buffer and boundaries of md, dc and va to geopandas
* also load in census block group and ACS income data for md, dc, va to geopandas
* merge ACS and Block group data for md, dc, & va respectively
* drop irrelevant columns
* transform walkshed and ACS_Income to the same crs

In [2]:
wksp5, wksp75 = load_walksheds()
all_boundaries = gpd.read_file('../../Data/Merge Boundaries/MD_DC_VA/Block MD_DC_VA.shp')

In [3]:
## block group geodataframe MD, DC, VA

gdb_MD_BG = gpd.read_file("../../Data/GDB states/tlgdb_2021_a_24_md.gdb", driver='FileGDB', layer='Block_Group')

gdb_DC_BG = gpd.read_file ("../../Data/GDB states/tlgdb_2021_a_11_dc.gdb", driver='FileGDB', layer='Block_Group')

gdb_VA_BG = gpd.read_file ("../../Data/GDB states/tlgdb_2021_a_51_va.gdb", driver='FileGDB', layer='Block_Group')

In [4]:
ACS_income_MD = gpd.read_file("../../Data/ACS_2020_5YR_BG_24_MARYLAND.gdb", driver='FileGDB', layer='X25_HOUSING_CHARACTERISTICS')
ACS_income_DC = gpd.read_file ("../../Data/ACS_2020_5YR_BG_11_DISTRICT_OF_COLUMBIA.gdb", driver='FileGDB', layer='X25_HOUSING_CHARACTERISTICS')
ACS_income_VA = gpd.read_file("../../Data/ACS_2020_5YR_BG_51_VIRGINIA.gdb",driver='FileGDB', layer='X25_HOUSING_CHARACTERISTICS')

In [5]:
ACS_income_MD['GEOID'] = ACS_income_MD['GEOID'].str.replace('15000US', '')
ACS_income_DC['GEOID'] = ACS_income_DC['GEOID'].str.replace('15000US', '')
ACS_income_VA['GEOID'] = ACS_income_VA['GEOID'].str.replace('15000US', '')

In [6]:
## join ACS to BG of three MD, DC, VA
ACS_Income_MD = pd.merge(ACS_income_MD, gdb_MD_BG, on='GEOID', how='outer')

ACS_Income_DC = pd.merge(ACS_income_DC, gdb_DC_BG, on='GEOID', how='outer')

ACS_Income_VA = pd.merge(ACS_income_VA, gdb_VA_BG, on='GEOID',how='outer')

In [7]:
ACS_Income_VA.rename(columns = {'geometry_y':'geometry'}, inplace = True)
ACS_Income_DC.rename(columns = {'geometry_y':'geometry'}, inplace = True)
ACS_Income_MD.rename(columns = {'geometry_y':'geometry'}, inplace = True)

ACS_Income_VA = ACS_Income_VA.drop(columns=['geometry_x'])
ACS_Income_DC = ACS_Income_DC.drop(columns=['geometry_x'])
ACS_Income_MD = ACS_Income_MD.drop(columns=['geometry_x'])

In [8]:
ACS_Income = pd.concat([ACS_Income_VA, ACS_Income_DC, ACS_Income_MD], axis=0)

In [10]:
ACS_Income = ACS_Income.to_crs('EPSG:4326')

* intersect hh income df with the walksheds

In [11]:
intersect_income_stations_p5, intersect_income_stations_p75 = overlay_wks(ACS_Income)

### Number of households with exactly zero cars

In [12]:
##rename B25044e3 column to household with no car
intersect_income_stations_p5.rename(columns = {'B25044e3':"Num_of_Household_with_No_Car_p5"}, inplace = True)
intersect_income_stations_p75.rename(columns = {'B25044e3':"Num_of_Household_with_No_Car_p75"}, inplace = True)

In [13]:
sum_income_stations_p5 = intersect_income_stations_p5.groupby(["Name_1"])["Num_of_Household_with_No_Car_p5"].median().reset_index()
sum_income_stations_p75 = intersect_income_stations_p75.groupby(["Name_1"])["Num_of_Household_with_No_Car_p75"].median().reset_index()

In [14]:
sum_income_stations_p5

Unnamed: 0,Name_1,Num_of_Household_with_No_Car_p5
0,ADDISON ROAD-SEAT PLEASANT,16.0
1,ANACOSTIA,9.0
2,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0
3,ARLINGTON CEMETERY,0.0
4,Ashburn,0.0
...,...,...
92,WHEATON,17.5
93,WHITE FLINT,10.0
94,WIEHLE-RESTON EAST,0.0
95,WOODLEY PARK-ZOO/ADAMS MORGAN,40.5


In [15]:
sum_income_stations_p5.to_csv("output/victor-no of household w no car_p5.csv", index=False)
sum_income_stations_p75.to_csv("output/victor-no of household w no car_p75.csv", index=False)

### Number of households with exactly 1 car

In [16]:
one_car_p5 = intersect_income_stations_p5.copy()
one_car_p75 = intersect_income_stations_p75.copy()

In [17]:
##rename B25044e4 column to household with no car
one_car_p5.rename(columns = {'B25044e4':"Num_of_Household_with_1_Car_p5"}, inplace = True)
one_car_p75.rename(columns = {'B25044e4':"Num_of_Household_with_1_Car_p75"}, inplace = True)

In [18]:
sum_one_car_stations_p5 = one_car_p5.groupby(["Name_1"])["Num_of_Household_with_1_Car_p5"].median().reset_index()
sum_one_car_stations_p75 = one_car_p75.groupby(["Name_1"])["Num_of_Household_with_1_Car_p75"].median().reset_index()

In [19]:
sum_one_car_stations_p5.head()

Unnamed: 0,Name_1,Num_of_Household_with_1_Car_p5
0,ADDISON ROAD-SEAT PLEASANT,182.0
1,ANACOSTIA,62.5
2,ARCHIVES-NAVY MEMORIAL-PENN QUARTER,0.0
3,ARLINGTON CEMETERY,0.0
4,Ashburn,0.0


In [20]:
sum_one_car_stations_p5.to_csv("output/victor-no of household w 1 car_p5.csv", index=False)
sum_one_car_stations_p75.to_csv("output/victor-no of household w 1 car_p75.csv", index=False)

### Number of households with more than 1 car

In [21]:
more_cars_p5 = intersect_income_stations_p5.copy()
more_cars_p75 = intersect_income_stations_p75.copy()

In [22]:
more_cars_p5['Sum_Car'] = more_cars_p5[['B25044e5', 'B25044e6', 'B25044e7', 'B25044e8']].sum(axis=1)
more_cars_p75['Sum_Car'] = more_cars_p75[['B25044e5', 'B25044e6', 'B25044e7', 'B25044e8']].sum(axis=1)

In [23]:
more_cars_p5.rename(columns = {'Sum_Car':"Num_of_Household_with_More_than_One_Car_p5"}, inplace = True)
more_cars_p75.rename(columns = {'Sum_Car':"Num_of_Household_with_More_than_One_Car_p75"}, inplace = True)

In [24]:
more_cars_station_p5 = more_cars_p5.groupby(["Name_1"])["Num_of_Household_with_More_than_One_Car_p5"].median().reset_index()
more_cars_station_p75 = more_cars_p75.groupby(["Name_1"])["Num_of_Household_with_More_than_One_Car_p75"].median().reset_index()


In [25]:
more_cars_station_p5.to_csv('output/victor-no of household w more than 1cars_p5.csv', index=False)
more_cars_station_p75.to_csv('output/victor-no of household w more than 1cars_p75.csv', index=False)