# Transit Equity/ Bus Routes (MBTA only)

In [148]:
# imports
import pandas as pd
import geopandas as gpd
import requests
import numpy as np

## Step 1: Data collection
Read [this report](http://www.wrrb.org/wp-content/uploads/2019/05/WRRB-FareFree-Transit-Report.pdf) to understand the issue. Collect data - create a spreadsheet of all the different bus stops in Massachusetts including MBTA, Regional Transit Authorities, and City/Town buses.

**Make sure datasets --- 'mbtabus/', 'CENSUS2010_BLK_BG_TRCT_SHP/' and 'census.csv' are under './data/'**

In [137]:
# Collect data for bus stops 
def load_bus_stop():
    """
    Data description link: https://docs.digital.mass.gov/dataset/massgis-data-mbta-bus-routes-and-stops
    """
    file = gpd.read_file("./data/mbtabus/MBTABUSSTOPS_PT.shp")
    return file


def load_census_tract():
    """
    Data description link: https://docs.digital.mass.gov/dataset/massgis-data-datalayers-2010-us-census
    """
    file = gpd.read_file("./data/CENSUS2010_BLK_BG_TRCT_SHP/CENSUS2010TRACTS_POLY.shp")
    return file

# Collect Census Data:
def get_median_hh_income():
    '''
        Returns Pandas DataFrame representation Median Household Income Estimate by Census Tract for MA.
        American Community Survey (ACS) 2018 Census data used.
        Specific table: ACS 2018 5-year detailed table "B19013_001E"
    '''
    URL = "https://api.census.gov/data/2018/acs/acs5?get=B19013_001E&for=tract:*&in=state:25"

    response = requests.get(url = URL)
    data = response.json()
    
    median_income_df = pd.DataFrame(data[1:len(data)-1], columns = data[0])
    
    return median_income_df

def load_median_hh_income():
    '''
        *** USE THIS FUNCTION TO LOAD INCOME DATA FROM LOCAL ***
        Returns Pandas DataFrame representation Median Household Income Estimate by Census Tract for MA.
        American Community Survey (ACS) 2018 Census data used.
        Specific table: ACS 2018 5-year detailed table "B19013_001E"
    '''
    median_income_df = pd.read_csv("./data/census.csv")
    return median_income_df    

In [138]:
busstop_gdf = load_bus_stop()
census_tract_gdf = load_census_tract()
median_income_for_tract_gdf = load_median_hh_income()

# Collect Census Data:
# res = get_median_hh_income()
# res.to_csv()

In [139]:
busstop_gdf

Unnamed: 0,STOP_ID,STOP_NAME,TOWN,TOWN_ID,geometry
0,3077,Gallivan Blvd @ opp Marsh St,BOSTON,35,POINT (237120.669 892643.408)
1,841,Lagrange St @ Virgil Rd,BOSTON,35,POINT (227915.195 892644.017)
2,446,Norfolk St @ Nelson St,BOSTON,35,POINT (234385.661 892644.944)
3,847,Lagrange St opp Virgil St,BOSTON,35,POINT (227912.601 892650.156)
4,3079,Adams St @ Minot St,BOSTON,35,POINT (236644.812 892651.990)
...,...,...,...,...,...
7805,9097,Grove St @ Lebanon St,MELROSE,178,POINT (236229.381 911541.866)
7806,5911,Grove St @ Lebanon St,MELROSE,178,POINT (236236.036 911542.538)
7807,5975,Wyoming Ave opp Cleveland St,MELROSE,178,POINT (234977.387 911544.999)
7808,15976,Wyoming Ave @ Cleveland St,MELROSE,178,POINT (234971.098 911547.184)


In [140]:
census_tract_gdf

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,GEOID10,NAME10,NAMELSAD10,MTFCC10,ALAND10,AWATER10,INTPTLAT10,...,AREA_ACRES,POP100_RE,HU100_RE,LOGPL94171,LOGSF1,LOGACS0610,LOGSF1C,SHAPE_AREA,SHAPE_LEN,geometry
0,25,021,418003,25021418003,4180.03,Census Tract 4180.03,G5020,1705668.0,2936.0,+42.2350240,...,422.1740,2481,1350,0141872,123764,0003690,0123764,1.708484e+06,6274.185034,"POLYGON ((240678.759 886748.073, 240607.995 88..."
1,25,021,417701,25021417701,4177.01,Census Tract 4177.01,G5020,1543651.0,12275.0,+42.2523398,...,384.4502,5417,2983,0141838,123730,0003683,0123730,1.555821e+06,6229.137913,"POLYGON ((240969.666 890122.806, 240985.156 89..."
2,25,021,417702,25021417702,4177.02,Census Tract 4177.02,G5020,1685529.0,317947.0,+42.2582818,...,416.4918,2765,1109,0141843,123735,0003684,0123735,1.685489e+06,8542.498286,"POLYGON ((242328.221 890545.050, 242276.888 89..."
3,25,021,418102,25021418102,4181.02,Census Tract 4181.02,G5020,771203.0,2371.0,+42.2470410,...,191.1397,3377,1676,0141882,123774,0003693,0123774,7.735182e+05,4540.296101,"POLYGON ((240372.666 888331.030, 240393.057 88..."
4,25,021,418004,25021418004,4180.04,Census Tract 4180.04,G5020,1316466.0,0.0,+42.2383744,...,325.2812,4280,1987,0141874,123766,0003691,0123766,1.316372e+06,5854.497366,"POLYGON ((240687.299 888298.408, 240700.449 88..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1467,25,027,709501,25027709501,7095.01,Census Tract 7095.01,G5020,5484847.0,0.0,+42.5343643,...,1355.2791,2149,809,0189917,171462,0004065,0171462,5.484642e+06,9858.734830,"POLYGON ((182231.394 921599.665, 182405.418 92..."
1468,25,027,709502,25027709502,7095.02,Census Tract 7095.02,G5020,6089034.0,1287.0,+42.5573788,...,1504.8969,5670,2488,0189919,171464,0004066,0171464,6.090126e+06,11906.751110,"POLYGON ((181045.532 923444.745, 181902.923 92..."
1469,25,027,730801,25027730801,7308.01,Census Tract 7308.01,G5020,3158638.0,7980.0,+42.2816214,...,782.4305,3665,1557,0190133,171678,0004114,0171678,3.166396e+06,9559.778037,"POLYGON ((174482.678 891951.535, 174407.978 89..."
1470,25,027,710800,25027710800,7108,Census Tract 7108,G5020,2337125.0,0.0,+42.5913348,...,577.4996,4539,1940,0189967,171512,0004077,0171512,2.337067e+06,9574.497510,"POLYGON ((176303.717 926403.339, 176284.512 92..."


In [141]:
median_income_for_tract_gdf

Unnamed: 0.1,Unnamed: 0,B19013_001E,state,county,tract
0,0,132750,25,25,60501
1,1,12759,25,25,61101
2,2,84083,25,25,70101
3,3,28851,25,25,70402
4,4,52676,25,25,71101
...,...,...,...,...,...
1472,1472,69750,25,5,613902
1473,1473,52741,25,5,630101
1474,1474,58362,25,5,630102
1475,1475,34692,25,5,640901


Then traverse through bus stops to indentify which tracts they are in.

In [142]:
def tract_for_stop(busstop_gdf, census_tract_gdf):
    """
    This frunction takes in busstop and tract data in geoDataFrame.
    Returns a dictionary in {stop_id: tract_id} format
    """
    stopid_tract_dict = {}
    
    points = busstop_gdf.geometry
    polygons = census_tract_gdf.geometry
    
    stop_ids = busstop_gdf['STOP_ID']
    tract_ids = census_tract_gdf['TRACTCE10']
    
    for i in range(len(busstop_gdf)):
        stopid = stop_ids[i]
        point = points[i]
        for j in range(len(census_tract_gdf)):
            tractid = tract_ids[j]
            polygon = polygons[j]
            if point.within(polygon):
                stopid_tract_dict[stopid] = tractid
                break;
    
    return stopid_tract_dict

In [72]:
stopid_tract_dict = tract_for_stop(busstop_gdf, census_tract_gdf)
stopid_tract_dict

{3077: '100700',
 841: '130200',
 446: '100300',
 847: '130200',
 3079: '100800',
 8687: '100800',
 808: '130200',
 33076: '100700',
 39870: '510200',
 3988: '510100',
 3989: '421100',
 3987: '421100',
 3990: '421100',
 3986: '421100',
 3991: '421100',
 3985: '421100',
 3984: '421100',
 3992: '421100',
 39941: '421100',
 39942: '421100',
 3993: '421100',
 3994: '421100',
 3982: '421100',
 3996: '421100',
 3981: '421100',
 4232: '457100',
 3980: '421100',
 3997: '421100',
 4233: '457100',
 3998: '421100',
 3979: '421100',
 14233: '457100',
 4234: '457100',
 4230: '457100',
 3999: '421100',
 3978: '421100',
 4229: '420302',
 4235: '420302',
 3977: '421100',
 4000: '421100',
 4236: '420302',
 3199: '417701',
 111720: '416200',
 4376: '416400',
 2784: '140107',
 3042: '417701',
 3659: '422800',
 3642: '422800',
 111823: '501101',
 4332: '416400',
 3355: '418200',
 3035: '417701',
 3394: '418200',
 111747: '416200',
 6509: '140300',
 111239: '501101',
 3240: '417702',
 3288: '417702',
 3315

Then add a column 'TRACT_ID' in bus stop data

In [143]:
busstop_gdf['TRACT_ID'] = stopid_tract_dict.values()
busstop_gdf

Unnamed: 0,STOP_ID,STOP_NAME,TOWN,TOWN_ID,geometry,TRACT_ID
0,3077,Gallivan Blvd @ opp Marsh St,BOSTON,35,POINT (237120.669 892643.408),100700
1,841,Lagrange St @ Virgil Rd,BOSTON,35,POINT (227915.195 892644.017),130200
2,446,Norfolk St @ Nelson St,BOSTON,35,POINT (234385.661 892644.944),100300
3,847,Lagrange St opp Virgil St,BOSTON,35,POINT (227912.601 892650.156),130200
4,3079,Adams St @ Minot St,BOSTON,35,POINT (236644.812 892651.990),100800
...,...,...,...,...,...,...
7805,9097,Grove St @ Lebanon St,MELROSE,178,POINT (236229.381 911541.866),336300
7806,5911,Grove St @ Lebanon St,MELROSE,178,POINT (236236.036 911542.538),336300
7807,5975,Wyoming Ave opp Cleveland St,MELROSE,178,POINT (234977.387 911544.999),336402
7808,15976,Wyoming Ave @ Cleveland St,MELROSE,178,POINT (234971.098 911547.184),336402


Then assign median income for each stop by 'TRACT_ID'

In [169]:
def get_tract_income_dict():
    tract_income_dict = {}
    
    # need to convert entries of type <numpy.int64> into a 6-digit string
    incomes = median_income_for_tract_gdf['B19013_001E']
    tracts = [str(x).zfill(6) for x in median_income_for_tract_df['tract']]
    for i in range(len(median_income_for_tract_df)):
        tract_income_dict[tracts[i]] = incomes[i]
    return tract_income_dict

def get_income_for_tract(tracts, tract_income_dict):
    incomes = []
    for tract in tracts:
        incomes.append(tract_income_dict[tract])
    return incomes

In [170]:
tract_income_dict = get_tract_income_dict()
incomes = get_income_for_tract(busstop_gdf['TRACT_ID'], tract_income_dict)

Then add a column 'income' in bus stop data

In [171]:
busstop_gdf['income'] = incomes
busstop_gdf

Unnamed: 0,STOP_ID,STOP_NAME,TOWN,TOWN_ID,geometry,TRACT_ID,income
0,3077,Gallivan Blvd @ opp Marsh St,BOSTON,35,POINT (237120.669 892643.408),100700,87063
1,841,Lagrange St @ Virgil Rd,BOSTON,35,POINT (227915.195 892644.017),130200,109034
2,446,Norfolk St @ Nelson St,BOSTON,35,POINT (234385.661 892644.944),100300,51736
3,847,Lagrange St opp Virgil St,BOSTON,35,POINT (227912.601 892650.156),130200,109034
4,3079,Adams St @ Minot St,BOSTON,35,POINT (236644.812 892651.990),100800,88425
...,...,...,...,...,...,...,...
7805,9097,Grove St @ Lebanon St,MELROSE,178,POINT (236229.381 911541.866),336300,120436
7806,5911,Grove St @ Lebanon St,MELROSE,178,POINT (236236.036 911542.538),336300,120436
7807,5975,Wyoming Ave opp Cleveland St,MELROSE,178,POINT (234977.387 911544.999),336402,79969
7808,15976,Wyoming Ave @ Cleveland St,MELROSE,178,POINT (234971.098 911547.184),336402,79969


Save the bus stop data into a new csv file

In [173]:
df = pd.DataFrame(busstop_gdf.drop(columns='geometry'))
df.to_csv("./output/stops_with_income.csv")

## Step 2: Income level assignment
Assign an income level to each stop based on the census tract data

**No need to run blocks above!!**
Here we use the income group standard according to [Pew Research](http://www.pewsocialtrends.org/2015/12/09/the-american-middle-class-is-losing-ground/), which shows as follows:

| LEVEL | INCOME GROUP | INCOME/\$ |
| :- | :- | -: |
| 0 | Lowest income | 31,000 or less|
| 1 | Lower-middle income | 31,000 - 42,000 |
| 2 | Middle-income | 42,000 - 126,000 |
| 3 | Upper-middle income | 126,000 - 188,000 |
| 4 | Higher-income | 188,000 or more |

In [177]:
# read result csv generated from step one
busstop_df = pd.read_csv("./output/stops_with_income.csv")
busstop_df

Unnamed: 0.1,Unnamed: 0,STOP_ID,STOP_NAME,TOWN,TOWN_ID,TRACT_ID,income
0,0,3077,Gallivan Blvd @ opp Marsh St,BOSTON,35,100700,87063
1,1,841,Lagrange St @ Virgil Rd,BOSTON,35,130200,109034
2,2,446,Norfolk St @ Nelson St,BOSTON,35,100300,51736
3,3,847,Lagrange St opp Virgil St,BOSTON,35,130200,109034
4,4,3079,Adams St @ Minot St,BOSTON,35,100800,88425
...,...,...,...,...,...,...,...
7805,7805,9097,Grove St @ Lebanon St,MELROSE,178,336300,120436
7806,7806,5911,Grove St @ Lebanon St,MELROSE,178,336300,120436
7807,7807,5975,Wyoming Ave opp Cleveland St,MELROSE,178,336402,79969
7808,7808,15976,Wyoming Ave @ Cleveland St,MELROSE,178,336402,79969


In [178]:
income_level = []

incomes = busstop_df['income']
for income in incomes:
    if income < 31000:
        income_level.append(0)
    elif 31000 <= income < 42000:
        income_level.append(1)
    elif 42000 <= income < 126000:
        income_level.append(2)
    elif 126000 <= income < 188000:
        income_level.append(3)
    elif 188000 <= income:
        income_level.append(4)

In [179]:
busstop_df['income_level'] = income_level
busstop_df

Unnamed: 0.1,Unnamed: 0,STOP_ID,STOP_NAME,TOWN,TOWN_ID,TRACT_ID,income,income_level
0,0,3077,Gallivan Blvd @ opp Marsh St,BOSTON,35,100700,87063,2
1,1,841,Lagrange St @ Virgil Rd,BOSTON,35,130200,109034,2
2,2,446,Norfolk St @ Nelson St,BOSTON,35,100300,51736,2
3,3,847,Lagrange St opp Virgil St,BOSTON,35,130200,109034,2
4,4,3079,Adams St @ Minot St,BOSTON,35,100800,88425,2
...,...,...,...,...,...,...,...,...
7805,7805,9097,Grove St @ Lebanon St,MELROSE,178,336300,120436,2
7806,7806,5911,Grove St @ Lebanon St,MELROSE,178,336300,120436,2
7807,7807,5975,Wyoming Ave opp Cleveland St,MELROSE,178,336402,79969,2
7808,7808,15976,Wyoming Ave @ Cleveland St,MELROSE,178,336402,79969,2


In [181]:
# Save to csv file
busstop_df.to_csv("./output/stops_with_incomeLevel.csv")