# Transit Equity/ Bus Routes (MBTA only)

In [None]:
# imports
import pandas as pd
import geopandas as gpd
import requests
import numpy as np
import math
from shapely.ops import nearest_points

## Step 1: Data collection
Read [this report](http://www.wrrb.org/wp-content/uploads/2019/05/WRRB-FareFree-Transit-Report.pdf) to understand the issue. Collect data - create a spreadsheet of all the different bus stops in Massachusetts including MBTA, Regional Transit Authorities, and City/Town buses.

**Make sure datasets --- 'mbtabus/', 'CENSUS2010_BLK_BG_TRCT_SHP/' and 'census.csv' are under './data/'**

In [None]:
# Collect data for bus stops 
def load_bus_stop():
    """
    Data description link: https://docs.digital.mass.gov/dataset/massgis-data-mbta-bus-routes-and-stops
    """
    file = gpd.read_file("./data/mbtabus/MBTABUSSTOPS_PT.shp")
    return file


def load_census_tract():
    """
    Data description link: https://docs.digital.mass.gov/dataset/massgis-data-datalayers-2010-us-census
    """
    file = gpd.read_file("./data/CENSUS2010_BLK_BG_TRCT_SHP/CENSUS2010TRACTS_POLY.shp")
    return file

# Collect Census Data:
def get_median_hh_income():
    '''
        Returns Pandas DataFrame representation Median Household Income Estimate by Census Tract for MA.
        American Community Survey (ACS) 2018 Census data used.
        Specific table: ACS 2018 5-year detailed table "B19013_001E"
    '''
    URL = "https://api.census.gov/data/2018/acs/acs5?get=B19013_001E&for=tract:*&in=state:25"

    response = requests.get(url = URL)
    data = response.json()
    
    median_income_df = pd.DataFrame(data[1:len(data)-1], columns = data[0])
    
    return median_income_df

def load_median_hh_income():
    '''
        *** USE THIS FUNCTION TO LOAD INCOME DATA FROM LOCAL ***
        Returns Pandas DataFrame representation Median Household Income Estimate by Census Tract for MA.
        American Community Survey (ACS) 2018 Census data used.
        Specific table: ACS 2018 5-year detailed table "B19013_001E"
    '''
    median_income_df = pd.read_csv("./data/census.csv")
    return median_income_df    

In [None]:
busstop_gdf = load_bus_stop()
census_tract_gdf = load_census_tract()
median_income_for_tract_df = load_median_hh_income()

# Collect Census Data:
# res = get_median_hh_income()
# res.to_csv()

In [None]:
busstop_gdf

In [None]:
census_tract_gdf

In [None]:
median_income_for_tract_df

In [None]:
# zfill the tractID to 6-digit str
median_income_for_tract_df.tract = [str(x).zfill(6) for x in median_income_for_tract_df['tract']]
median_income_for_tract_df

In [None]:
# Find abnormal values and correct them with its neighbors' average incomes
# census_tract_gdf.plot()
# busstop_gdf.plot()

# while True:
#     abnormal_tracts = ['980300', '980700', '981000', '981201', '981202', '981502', '981600', '981700']
    
#     for tract in abnormal_tracts:
#         polygon = census_tract_gdf[census_tract_gdf['TRACTCE10'] == tract].geometry
#         print(polygon)
#         neighbors = census_tract_gdf[census_tract_gdf.geometry.touches(polygon)].TRACTCE10.tolist()
#         print(len(neighbors))
#     break

Then traverse through bus stops to indentify which tracts they are in.

In [None]:
def tract_for_stop(busstop_gdf, census_tract_gdf):
    """
    This function takes in busstop and tract data in geoDataFrame.
    Returns a dictionary in {stop_id: tract_id} format
    """
    stopid_tract_dict = {}
    
    points = busstop_gdf.geometry
    polygons = census_tract_gdf.geometry
    
    stop_ids = busstop_gdf['STOP_ID']
    tract_ids = census_tract_gdf['TRACTCE10']
    
    for i in range(len(busstop_gdf)):
        stopid = stop_ids[i]
        point = points[i]
        for j in range(len(census_tract_gdf)):
            tractid = tract_ids[j]
            polygon = polygons[j]
            if point.within(polygon):
                stopid_tract_dict[stopid] = tractid
                break;
    
    return stopid_tract_dict

In [None]:
stopid_tract_dict = tract_for_stop(busstop_gdf, census_tract_gdf)

Then add a column 'TRACT_ID' in bus stop data

In [None]:
busstop_gdf['TRACT_ID'] = stopid_tract_dict.values()
busstop_gdf

Then assign median income for each stop by 'TRACT_ID'

In [None]:
def get_tract_income_dict():
    tract_income_dict = {}
    
    # need to convert entries of type <numpy.int64> into a 6-digit string
    incomes = median_income_for_tract_gdf['B19013_001E']
    tracts = [str(x).zfill(6) for x in median_income_for_tract_df['tract']]
    for i in range(len(median_income_for_tract_df)):
        tract_income_dict[tracts[i]] = incomes[i]
    return tract_income_dict

def get_income_for_tract(tracts, tract_income_dict):
    incomes = []
    for tract in tracts:
        incomes.append(tract_income_dict[tract])
    return incomes

In [None]:
tract_income_dict = get_tract_income_dict()
incomes = get_income_for_tract(busstop_gdf['TRACT_ID'], tract_income_dict)

Then add a column 'income' in bus stop data

In [None]:
busstop_gdf['income'] = incomes
busstop_gdf

Save the bus stop data into a new csv file

In [None]:
df = pd.DataFrame(busstop_gdf.drop(columns='geometry'))
df.to_csv("./output/stops_with_income.csv")

## Step 2: Income level assignment
Assign an income level to each stop based on the census tract data

**No need to run blocks above!!**
Here we use the income group standard according to [Pew Research](http://www.pewsocialtrends.org/2015/12/09/the-american-middle-class-is-losing-ground/), which shows as follows:

| LEVEL | INCOME GROUP | INCOME/\$ |
| :- | :- | -: |
| 0 | Lowest income | 31,000 or less|
| 1 | Lower-middle income | 31,000 - 42,000 |
| 2 | Middle-income | 42,000 - 126,000 |
| 3 | Upper-middle income | 126,000 - 188,000 |
| 4 | Higher-income | 188,000 or more |

In [None]:
import pandas as pd
# read result csv generated from step one
busstop_df = pd.read_csv("./output/stops_with_income.csv")
busstop_df

In [None]:
income_level = []

incomes = busstop_df['income']
for income in incomes:
    if income<=0:
        income_level.append(-1)
    elif 0<income < 31000:
        income_level.append(0)
    elif 31000 <= income < 42000:
        income_level.append(1)
    elif 42000 <= income < 126000:
        income_level.append(2)
    elif 126000 <= income < 188000:
        income_level.append(3)
    elif 188000 <= income:
        income_level.append(4)

In [None]:
busstop_df['income_level'] = income_level
busstop_df

In [None]:
# Save to csv file
busstop_df.to_csv("./output/stops_with_incomeLevel.csv", index_label=False)

In [None]:
# show stops whose income are unknown
busstop_df[busstop_df.income<0]

## Step 3&4: Ridership & Revenue for each stop
Calculate annual revenue for each stop
1. Find fare for each route
| route_type    | fare | fare (reduced) |
|---------------|------|----------------|
| Local Bus     | 1.7  | 0.85           |
| Inner Express | 4.25 | 2.10           |
| Outer Express | 5.25 | 2.60           |
2. Connect routes for each stop
3. Collect ridership for each route per stop
3. Calculate annual renevue for each stop, note: reduced fare, monthly pass

In [None]:
import pandas as pd
import geopandas as gpd

### 1. Find fare for each route

In [None]:
# route info
routes_df = pd.read_csv('./data/fare&ridership/routes.csv')
routes_df

In [None]:
route_fare_class = {'Local Bus': 1.7, 'Inner Express': 4.25, 'Outer Express': 5.25, 'Free':0}
route_reduced_fare_class = {'Local Bus': 0.85, 'Inner Express': 2.1, 'Outer Express': 2.6, 'Free':0}

# add a column of fare for each route
routes_df['fare'] = float('nan')
routes_df['reduced_fare'] = float('nan')
for idx, row in routes_df.iterrows():
    this_fare_class = row['route_fare_class']
    if this_fare_class not in route_fare_class:
        continue
    else:
        routes_df.at[idx, 'fare'] = route_fare_class[this_fare_class]
        routes_df.at[idx, 'reduced_fare'] = route_reduced_fare_class[this_fare_class]

routes_df.to_csv('./output/routes_with_fare.csv', index_label=False)

### 2. Connect routes for each stop

In [None]:
stops_df = pd.read_csv('./output/stops_with_incomeLevel.csv')
stops_df

In [None]:
ridership_df = pd.read_csv('./data/fare&ridership/Line,_and_Stop.csv', low_memory=False)
ridership_df

In [None]:
stops_df['route_ids'] = ""
for idx, row in stops_df.iterrows():
    stop_id = row['STOP_ID']
    route_ids = list(set(ridership_df[ridership_df.stop_id == int(stop_id)].route_id.tolist()))
    stops_df.at[idx, 'route_ids'] = ','.join(route_ids)
stops_df.to_csv('./output/stops.csv', index_label=False)
stops_df

### 3. Collect ridership for each route per stop

In [None]:
routes_df = pd.read_csv('output/routes_with_fare.csv')
routes_df

In [None]:
ridership_df = pd.read_csv('./data/fare&ridership/Line,_and_Stop.csv', low_memory=False)
# load only rows for Fall 2019
ridership_df = ridership_df[ridership_df.season == 'Fall 2019']
ridership_df

In [None]:
stops_df = pd.read_csv('./output/stops.csv', low_memory=False)
stops_df.info()

In [None]:
# ridership_df[(ridership_df.route_id=='99') & (ridership_df.stop_id==5327) & (ridership_df.day_type_name=='weekday')]
stops_df['ridership'] = ""
for idx, row in stops_df.iterrows():
    print('{}/{}'.format(idx, len(stops_df)))
    stop_id = int(row['STOP_ID'])
    route_ids = str(row['route_ids']).split(',')
    riderships = []
    for route_id in route_ids:
        weekday_ons = sum(ridership_df[(ridership_df.route_id==route_id) & (ridership_df.stop_id==stop_id) & (ridership_df.day_type_name=='weekday')].boardings) * 5
        saturday_ons = sum(ridership_df[(ridership_df.route_id==route_id) & (ridership_df.stop_id==stop_id) & (ridership_df.day_type_name=='saturday')].boardings)
        sunday_ons = sum(ridership_df[(ridership_df.route_id==route_id) & (ridership_df.stop_id==stop_id) & (ridership_df.day_type_name=='sunday')].boardings)
        week_ons = weekday_ons+saturday_ons+sunday_ons
        year_ons = week_ons * 52
        riderships.append("{:.1f}".format(year_ons))
    stops_df.at[idx, 'ridership'] =  ','.join(riderships)
stops_df

In [None]:
stops_df.to_csv('./output/stops.csv', index_label=False)

### 4. Calculate annual renevue for each stop, note: reduced fare, monthly pass
Here I assume the revenue composition (payment method) is fixed for each route, and that a rider uses a monthly pass twice every weekday, and that a month has 22 weekdays.

In [None]:
routes_df = pd.read_csv('./output/routes_with_fare.csv')
routes_df

In [None]:
stops_df['revenues'] = ""

for idx, row in stops_df.iterrows():    
    route_ids = str(row['route_ids']).split(',')
    riderships = [float(x) for x in str(row['ridership']).split(',')]
    assert len(route_ids)==len(riderships)
    
    revenues = []
    for i in range(len(riderships)):
        route_id = route_ids[i]
        ridership = riderships[i]
        
        route_info = routes_df[routes_df.route_id == route_id]
        if len(route_info)==0:
            # no route info, assume it as local bus
            fare = 1.7
            fare_reduced = 0.85
        else:
            fare = float(route_info.fare)
            fare_reduced = float(route_info.reduced_fare)
        
        # monthly pass - 70%
        ridership_monthlyPass = ridership * 0.7
        revenue_0 = ridership_monthlyPass * 0.17 * (30/(22*2))
        revenue_1 = ridership_monthlyPass * 0.1 * (55/(22*2))
        revenue_2 = ridership_monthlyPass * 0.69 * (90/(22*2))
        revenue_3 = ridership_monthlyPass * 0.04 * (90/(22*2))
        revenue_monthlyPass = revenue_0 + revenue_1 + revenue_2 + revenue_3
        
        # pay-per-ride - 22%
        ridership_payPerRide = ridership * 0.22
        revenue_4 = ridership_payPerRide * 0.03 * 1.7
        revenue_5 = ridership_payPerRide * 0.16 * fare_reduced
        revenue_6 = ridership_payPerRide * 0.79 * fare
        revenue_payPerRide = revenue_4 + revenue_5 + revenue_6
        
        # others, ignore
        # sum up
        revenues.append("{:.1f}".format(revenue_monthlyPass+revenue_payPerRide))
    stops_df.at[idx, 'revenues'] = ','.join(revenues)

stops_df

In [None]:
# Add revenues up for each stop
stops_df['revenue_annual'] = ""
for idx, row in stops_df.iterrows():
    revenues = list(map(float, str(row['revenues']).split(',')))
    stops_df.at[idx, 'revenue_annual'] = sum(revenues)
stops_df

In [None]:
stops_df.to_csv('./output/stops.csv', index_label=False)

## Step 5: Identify which bus routes, stops, or zones would have the most positive effect on low income riders if free. Identify which towns would be impacted?
1. Draw a 0.5 mile radius circle around each stop, store as a column
2. Calculate percentage of the interception between the circle and tracts
3. Calculate weighed median income for people that the stop impacts
4. Calculate weighed number of people impacted by each stop

In [2]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, Point

### 0. Data loading & Preprocessing

In [2]:
# gdf_tract_geo = gpd.read_file('./data/census_tract/tl_2019_25_tract/tl_2019_25_tract.shp')
# gdf_tract_geo.head()
gdf_tract_geo = gpd.read_file('./data/census_tract/CENSUS2010_BLK_BG_TRCT_SHP/CENSUS2010TRACTS_POLY.shp')
gdf_tract_geo.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,GEOID10,NAME10,NAMELSAD10,MTFCC10,ALAND10,AWATER10,INTPTLAT10,...,AREA_ACRES,POP100_RE,HU100_RE,LOGPL94171,LOGSF1,LOGACS0610,LOGSF1C,SHAPE_AREA,SHAPE_LEN,geometry
0,25,21,418003,25021418003,4180.03,Census Tract 4180.03,G5020,1705668.0,2936.0,42.235024,...,422.174,2481,1350,141872,123764,3690,123764,1708484.0,6274.185034,"POLYGON ((240678.759 886748.073, 240607.995 88..."
1,25,21,417701,25021417701,4177.01,Census Tract 4177.01,G5020,1543651.0,12275.0,42.2523398,...,384.4502,5417,2983,141838,123730,3683,123730,1555821.0,6229.137913,"POLYGON ((240969.666 890122.806, 240985.156 89..."
2,25,21,417702,25021417702,4177.02,Census Tract 4177.02,G5020,1685529.0,317947.0,42.2582818,...,416.4918,2765,1109,141843,123735,3684,123735,1685489.0,8542.498286,"POLYGON ((242328.221 890545.050, 242276.888 89..."
3,25,21,418102,25021418102,4181.02,Census Tract 4181.02,G5020,771203.0,2371.0,42.247041,...,191.1397,3377,1676,141882,123774,3693,123774,773518.2,4540.296101,"POLYGON ((240372.666 888331.030, 240393.057 88..."
4,25,21,418004,25021418004,4180.04,Census Tract 4180.04,G5020,1316466.0,0.0,42.2383744,...,325.2812,4280,1987,141874,123766,3691,123766,1316372.0,5854.497366,"POLYGON ((240687.299 888298.408, 240700.449 88..."


In [21]:
# load stop info
gdf_stop = gpd.read_file('./data/stops_routes/mbtabus/MBTABUSSTOPS_PT.shp')
gdf_stop.head()

Unnamed: 0,STOP_ID,STOP_NAME,TOWN,TOWN_ID,geometry
0,3077,Gallivan Blvd @ opp Marsh St,BOSTON,35,POINT (237120.669 892643.408)
1,841,Lagrange St @ Virgil Rd,BOSTON,35,POINT (227915.195 892644.017)
2,446,Norfolk St @ Nelson St,BOSTON,35,POINT (234385.661 892644.944)
3,847,Lagrange St opp Virgil St,BOSTON,35,POINT (227912.601 892650.156)
4,3079,Adams St @ Minot St,BOSTON,35,POINT (236644.812 892651.990)


### 1. Draw a 0.5 mile radius circle around each stop, store as a column

In [4]:
# This shows the units
gdf_tract_geo.crs

<Projected CRS: EPSG:26986>
Name: NAD83 / Massachusetts Mainland
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: USA - Massachusetts - SPCS - mainland
- bounds: (-73.5, 41.46, -69.86, 42.89)
Coordinate Operation:
- name: SPCS83 Massachusetts Mainland zone (meters)
- method: Lambert Conic Conformal (2SP)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [5]:
def mile2meter(mile):
    conversion_factor = 0.62137119
    return mile / conversion_factor * 1000

In [6]:
gdf_stop_circle = gpd.GeoDataFrame()
gdf_stop_circle['STOP_ID'] = gdf_stop['STOP_ID']

radius = mile2meter(0.5)
gdf_stop_circle['geometry'] = gdf_stop['geometry'].buffer(radius)
gdf_stop_circle = gdf_stop_circle.set_crs('EPSG:26986')
gdf_stop_circle = gdf_stop_circle.set_geometry('geometry')
gdf_stop_circle.to_file('./output/radius.shp', driver='ESRI Shapefile')

gdf_stop_circle.head()

Unnamed: 0,STOP_ID,geometry
0,3077,"POLYGON ((237925.341 892643.408, 237921.466 89..."
1,841,"POLYGON ((228719.867 892644.017, 228715.993 89..."
2,446,"POLYGON ((235190.333 892644.944, 235186.458 89..."
3,847,"POLYGON ((228717.273 892650.156, 228713.398 89..."
4,3079,"POLYGON ((237449.484 892651.990, 237445.610 89..."


### 2. Calculate percentage of the interception between the circle and tracts
- Find overlapping tracts
- Calculate overlapping area
- Calculate percentage

In [None]:
gdf_stop_circle['impacted_tractid'] = ""
gdf_stop_circle['proportion'] = ""
for idx, row in gdf_stop_circle.iterrows():
    # tract_ids for overlapping
    overlaps = gdf_tract_geo[gdf_tract_geo['geometry'].overlaps(row['geometry'])|gdf_tract_geo['geometry'].covers(row['geometry'])|gdf_tract_geo['geometry'].within(row['geometry'])]['TRACTCE10'].tolist()
    # area for each overlapping
    proportion = []
    for y in overlaps:
        tract_area = sum(gdf_tract_geo.loc[gdf_tract_geo['TRACTCE10']==y,]['SHAPE_AREA'].tolist())
        area = sum(gpd.overlay(gdf_tract_geo.loc[gdf_tract_geo.TRACTCE10==y,], gdf_stop_circle.loc[gdf_stop_circle.STOP_ID==row.STOP_ID,], how='intersection').area.tolist())
        proportion.append(float(area/tract_area))
    if len(overlaps)==0: 
        raise ValueError()
    gdf_stop_circle.at[idx, 'impacted_tractid'] = ','.join(overlaps)
    gdf_stop_circle.at[idx, 'proportion'] = ','.join(["{:.2f}".format(x) for x in proportion])
gdf_stop_circle

In [8]:
gdf_stop_circle.to_csv('./output/stop_radius.csv', index_label=False)

## 3. Calculate weighed median income for people that the stop impacts

In [3]:
df_income_tract = pd.read_csv('./data/census_tract/census.csv')
df_income_tract.head()

Unnamed: 0.1,Unnamed: 0,B19013_001E,state,county,tract
0,0,132750,25,25,60501
1,1,12759,25,25,61101
2,2,84083,25,25,70101
3,3,28851,25,25,70402
4,4,52676,25,25,71101


In [4]:
df_stop_circle = pd.read_csv('./output/stop_radius.csv')
df_stop_circle.head()

Unnamed: 0,STOP_ID,geometry,impacted_tractid,proportion
0,3077,"POLYGON ((237925.3409028954 892643.4076999985,...",416400100800100601100700100603,"0.01,0.17,0.13,0.99,0.29"
1,841,"POLYGON ((228719.867402897 892644.0174999982, ...",130402130300130200130100130404,"0.08,0.26,0.63,0.13,0.02"
2,446,"POLYGON ((235190.3329028941 892644.9441, 23518...","100500,100400,100300,101102,101101,101002,1009...","0.17,0.18,1.00,0.41,0.06,0.24,0.02,1.00,0.37"
3,847,"POLYGON ((228717.2732028968 892650.1563000008,...",130402130300130200130100130404,"0.08,0.26,0.63,0.13,0.02"
4,3079,"POLYGON ((237449.4844028956 892651.9899999984,...",416400100800100601100500100400100700100603,"0.00,0.51,0.32,0.01,0.03,0.67,0.06"


In [5]:
df_stop_circle['income'] = -1
for idx, row in df_stop_circle.iterrows():
    tracts = str(row['impacted_tractid']).split(',')
    proportions = list(map(float, str(row['proportion']).split(',')))
    incomes = []
    for i in range(len(tracts)):
        tract = tracts[i]
        income = int(df_income_tract[df_income_tract['tract']==int(tract)].B19013_001E.values[0])
        if income<0:
            incomes.append(0)
            proportions[i]==0
        else:
            incomes.append(income)
    
    assert len(tracts)==len(incomes)==len(proportions)
    weighed_income = (sum([incomes[i]*proportions[i] for i in range(len(tracts))]))/sum(proportions)

    df_stop_circle.at[idx, 'income'] = weighed_income

In [6]:
income_level = []

incomes = df_stop_circle['income']
for income in incomes:
    if income<=0:
        income_level.append(-1)
    elif 0 < income < 31000:
        income_level.append(0)
    elif 31000 <= income < 42000:
        income_level.append(1)
    elif 42000 <= income < 126000:
        income_level.append(2)
    elif 126000 <= income < 188000:
        income_level.append(3)
    elif 188000 <= income:
        income_level.append(4)
df_stop_circle['income_level'] = income_level

In [7]:
df_stop_circle.to_csv('./output/stop_with_weighed_level.csv', index_label=False)

## 4. Calculate weighed number of people impacted by each stop

In [8]:
df_tract_pop = pd.read_csv('./output/tracts_with_population.csv')
df_tract_pop.head()

Unnamed: 0.1,Unnamed: 0,tract_id,public_transport,walking,other,total_employed,income,impacted_pop
0,1,10100,34,391,208,1658,50741,633
1,2,10206,26,86,57,1620,69267,169
2,3,10208,0,1,8,604,65446,9
3,4,10304,18,0,12,1077,79044,30
4,5,10306,0,67,41,1087,62553,108


In [9]:
df_stop_circle = pd.read_csv('./output/stop_with_weighed_level.csv')
df_stop_circle.head()

Unnamed: 0,STOP_ID,geometry,impacted_tractid,proportion,income,income_level
0,3077,"POLYGON ((237925.3409028954 892643.4076999985,...",416400100800100601100700100603,"0.01,0.17,0.13,0.99,0.29",86562,2
1,841,"POLYGON ((228719.867402897 892644.0174999982, ...",130402130300130200130100130404,"0.08,0.26,0.63,0.13,0.02",107359,2
2,446,"POLYGON ((235190.3329028941 892644.9441, 23518...","100500,100400,100300,101102,101101,101002,1009...","0.17,0.18,1.00,0.41,0.06,0.24,0.02,1.00,0.37",46390,2
3,847,"POLYGON ((228717.2732028968 892650.1563000008,...",130402130300130200130100130404,"0.08,0.26,0.63,0.13,0.02",107359,2
4,3079,"POLYGON ((237449.4844028956 892651.9899999984,...",416400100800100601100500100400100700100603,"0.00,0.51,0.32,0.01,0.03,0.67,0.06",83532,2


In [10]:
df_stop_circle['impacted_pop'] = 0
for idx, row in df_stop_circle.iterrows():
    tracts = str(row['impacted_tractid']).split(',')
    proportions = list(map(float, str(row['proportion']).split(',')))
    impacted_pops = []
    for tract in tracts:
        impacted_pop = int(df_tract_pop[df_tract_pop.tract_id==int(tract)].impacted_pop.values[0])
        impacted_pops.append(impacted_pop)
    weighed_pop = sum([impacted_pops[i]*proportions[i] for i in range(len(tracts))])

    df_stop_circle.at[idx, 'impacted_pop'] = weighed_pop

In [12]:
gdf_stop = gpd.read_file('./data/stops_routes/mbtabus/MBTABUSSTOPS_PT.shp')
df_stop_circle['location'] = gdf_stop['geometry']

cols = df_stop_circle.columns.tolist()
cols = cols[:1] + cols[7:] + cols[1:7]

df_stop_circle = df_stop_circle[cols]

df_stop_circle.to_csv('./output/stops_weighed.csv', index_label=False)

In [13]:
df_stop_circle.head()

Unnamed: 0,STOP_ID,location,geometry,impacted_tractid,proportion,income,income_level,impacted_pop
0,3077,POINT (237120.669 892643.408),"POLYGON ((237925.3409028954 892643.4076999985,...",416400100800100601100700100603,"0.01,0.17,0.13,0.99,0.29",86562,2,1046
1,841,POINT (227915.195 892644.017),"POLYGON ((228719.867402897 892644.0174999982, ...",130402130300130200130100130404,"0.08,0.26,0.63,0.13,0.02",107359,2,632
2,446,POINT (234385.661 892644.944),"POLYGON ((235190.3329028941 892644.9441, 23518...","100500,100400,100300,101102,101101,101002,1009...","0.17,0.18,1.00,0.41,0.06,0.24,0.02,1.00,0.37",46390,2,3056
3,847,POINT (227912.601 892650.156),"POLYGON ((228717.2732028968 892650.1563000008,...",130402130300130200130100130404,"0.08,0.26,0.63,0.13,0.02",107359,2,632
4,3079,POINT (236644.812 892651.990),"POLYGON ((237449.4844028956 892651.9899999984,...",416400100800100601100500100400100700100603,"0.00,0.51,0.32,0.01,0.03,0.67,0.06",83532,2,1647


In [14]:
# save as shapefile
gdf = gpd.GeoDataFrame(df_stop_circle, geometry='location')
gdf.to_file('./output/stops.shp', driver='ESRI Shapefile')