In [1]:
import pandas as pd
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
import re
import numpy as np
from ast import literal_eval as make_tuple

# SPC District Analysis

In [2]:
spcpath = "../Data/SPC_TripSummaries_44Districts.xlsx"
spcxls = pd.ExcelFile(spcpath)
sheet_list = spcxls.sheet_names  # see all sheet names
tot_list = sheet_list[0:3] # Total Trips tables
hwy_list = sheet_list[3:6] # Highway Trips tables
trans_list = sheet_list[6:9] #Transit Trips tables

for i, s in enumerate(sheet_list):
   print('{}: {}'.format(i,s))

0: T1_HBW_TotPers
1: T2_HBO_TotPers
2: T3_NHB_TotPers
3: T4_HBW_HwyVeh
4: T5_HBO_HwyVeh
5: T6_NHB_HwyVeh
6: T7_HBW_TranPers
7: T8_HBO_TranPers
8: T9_NHB_TranPers
9: T10_HBW_SOV
10: T11_HBW_HOV2
11: T12_HBW_HOV3
12: T13_HBW_HOV4+
13: T14_HBW_ModeShare
14: T15_HBO_ModeShare
15: T16_NHB_ModeShare


## Explanation of Trips

Home-Based (HB) Trip:
 * The Home of the trip maker is either the origin or destination of the journey (one of the “trip-ends”) is the Home
 * Other end is Work (HBW) or Other (HBO)

None-Home Based (NHB) Trip – Neither end of the trip is the Home of the traveller

## Explanation of Districts
We have 44 districts in this breakdown of SPC trip generations. Below is a reference dicionary that maps the district number to district name.

In [3]:
trans_hbw_tb = pd.read_excel(spcpath, 'T7_HBW_TranPers')
dis_names = list(trans_hbw_tb.iloc[5:49,1])
dis_dict = { i : dis_names[i-1] for i in range(1, len(dis_names)+1 ) }
dis_dict[18]

'Plum Borough'

The excel doc has the 2020 table on top and 2045 in the cells below. We want to grab tables individually:

In [4]:
def extract_tab_2020(excel, sheet_name):
    sheet_df = pd.read_excel(excel, sheet_name)
    sub_df = sheet_df.iloc[5:49,4:48]
    sub_df.columns = dis_dict.values()
    return(sub_df)

In [5]:
def extract_tab_2045(excel, sheet_name):
    sheet_df = pd.read_excel(excel, sheet_name)
    sub_df = sheet_df.iloc[61:105,4:48]
    sub_df.columns = dis_dict.values()
    return(sub_df)

In [6]:
spc_df_dict_2020 = {sheet_name: extract_tab_2020(spcxls, sheet_name) for sheet_name in spcxls.sheet_names}

In [7]:
spc_df_dict_2045 = {sheet_name: extract_tab_2045(spcxls, sheet_name) for sheet_name in spcxls.sheet_names}

In [8]:
for sheet_name, sheet_data in spc_df_dict_2020.items():
    fname = '../Outputs/spc20_' + sheet_name +'.csv'
    #print(fname)
    #sheet_data.to_csv(fname, index = None, header=True)

In [9]:
for sheet_name, sheet_data in spc_df_dict_2045.items():
    fname = '../Outputs/spc45' + sheet_name +'.csv'
    #print(fname)
    #sheet_data.to_csv(fname, index = None, header=True)

To get the total trips, we aggregate the different trip types from earlier:

In [10]:
def combo_tab(excel_dict, tab_list, district_dictionary, offset):
    #print(tab_list[0])
    trans_all = excel_dict[tab_list[0]]
    #print(trans_all.iloc[:2,:2])
    for tab in tab_list[1:]:
        #print(tab)
        #print(excel_dict[tab].iloc[:2,:2])
        trans_all = trans_all.add(excel_dict[tab], fill_value=0)
    colnames = list(trans_all.columns)
    trans_all.reset_index(inplace=True)
    trans_all['index'] = trans_all['index']-offset
    trans_all['dist_origin'] = trans_all['index'].map(district_dictionary)
    trans_all = trans_all[['index', 'dist_origin']+colnames]
    return(trans_all)

In [11]:
tot_2020_all = combo_tab(spc_df_dict_2020, tot_list, dis_dict, 4) # Total 2020 trips
tot_2045_all = combo_tab(spc_df_dict_2045, tot_list, dis_dict, 60) # Total 2045 trips

trans_2020_all = combo_tab(spc_df_dict_2020, trans_list, dis_dict, 4) # Total transit trips 2020
trans_2045_all = combo_tab(spc_df_dict_2045, trans_list, dis_dict, 60) # Total transit trips 2045

In [12]:
tot_2020_all.head(2)

Unnamed: 0,index,dist_origin,Pittsburgh Chateau Area,Pittsburgh Lower North Side,Pittsburgh Upper North Side,Allegheny County Northwest,Allegheny County North (remainder),Hazelwood,Squirrel Hill,Oakland,...,Greene County,Indiana County,Lawrence County,Washington County,Westmoreland County,Midtown/Uptown,Highland Park,Homewood/Lincoln-Lemington,Pittsburgh Southwest (remainder),Allegheny County Southcentral
0,1,Pittsburgh Chateau Area,3313,2181.0,1118.0,1493.0,2458.0,54.0,160.0,1238.0,...,0.0,0.0,0.0,36.0,18.0,440.0,24.0,106.0,1909.0,1687.0
1,2,Pittsburgh Lower North Side,1782,3400.0,993.0,556.0,2774.0,65.0,179.0,1415.0,...,0.0,0.0,0.0,45.0,27.0,604.0,46.0,188.0,1635.0,1633.0


The columns represent the "destintation districts" while the rows represent the "origin districts". 
Therefore, currently to visualize in GIS by column, the data is set to represent "inbound" trips by district. 
The transpose is the "outbound"

In [13]:
tot_2020_all.to_csv('../Outputs/spc2020_inbound_tot_trips.csv')
tot_2045_all.to_csv('../Outputs/spc2045_inbound_tot_trips.csv')
tot_2020_all.transpose().to_csv('../Outputs/spc2020_outbound_tot_trips.csv')
trans_2045_all.transpose().to_csv('../Outputs/spc2045_outbound_tot_trips.csv')

trans_2020_all.to_csv('../Outputs/spc2020_inbound_transit_trips.csv')
trans_2045_all.to_csv('../Outputs/spc2045_inbound_transit_trips.csv')
trans_2020_all.transpose().to_csv('../Outputs/spc2020_outbound_transit_trips.csv')
trans_2045_all.transpose().to_csv('../Outputs/spc2045_outbound_transit_trips.csv')

In order to get the percentage of total trips that are transit trips, and also to get the percent change from 2020 to 2045, we divide tables to get the ratio with the following function:

In [14]:
def divider_a_b(a,b):
    a2 = a.iloc[:,2:].values
    b = b.iloc[:,2:].values
    c = np.divide(a2, b, out=np.zeros_like(a2), where=b!=0)
    c = pd.DataFrame(c)
    c.columns = [str(k)+' '+v for k, v in dis_dict.items()]
    c.insert(0, 'Dist_No',  a.iloc[:,[0]])
    c.insert(1, 'District_Name',  a.iloc[:,[1]])
    return(c)

In [15]:
trans_rider_multiplier = divider_a_b(trans_2045_all, trans_2020_all)
mode_share_2020 = divider_a_b(trans_2020_all, tot_2020_all)
mode_share_2045 = divider_a_b(trans_2045_all, tot_2045_all)
mode_share_2020.to_csv('../Outputs/spc2020_inbound_pct_transit_trips.csv')
mode_share_2045.to_csv('../Outputs/spc2045_inbound_pct_transit_trips.csv')
mode_share_2020.transpose().to_csv('../Outputs/spc2020_outbound_pct_transit_trips.csv')
mode_share_2045.transpose().to_csv('../Outputs/spc2045_outound_pct_transit_trips.csv')
trans_rider_multiplier.to_csv('../Outputs/spc_future_multiplier.csv')

The table below gets the "multiplier" in change from 2020 to 2045 by district to district

In [16]:
trans_rider_multiplier.head(2)

Unnamed: 0,Dist_No,District_Name,1 Pittsburgh Chateau Area,2 Pittsburgh Lower North Side,3 Pittsburgh Upper North Side,4 Allegheny County Northwest,5 Allegheny County North (remainder),6 Hazelwood,7 Squirrel Hill,8 Oakland,...,35 Greene County,36 Indiana County,37 Lawrence County,38 Washington County,39 Westmoreland County,40 Midtown/Uptown,41 Highland Park,42 Homewood/Lincoln-Lemington,43 Pittsburgh Southwest (remainder),44 Allegheny County Southcentral
0,1,Pittsburgh Chateau Area,1.15385,1.07216,1.10714,1.04839,0.957746,3,1.16667,1.39583,...,0,0,0,0,0,1.56522,2.0,1.0,1.14815,1.06522
1,2,Pittsburgh Lower North Side,1.0,1.0119,1.08824,1.07143,0.975,4,1.4,1.45679,...,0,0,0,1,0,1.64935,1.25,1.16667,1.06818,0.986486


If we wanted to know how many trips were estimated from district A to district B, we input a table and destination and origin:

In [17]:
def find_inbound_A_to_B(spc_matrix, dist_A, dist_B):
    return(spc_matrix.loc[spc_matrix.iloc[:,0]==dist_A,].iloc[:,(1+dist_B)].iloc[0])

In [18]:
find_inbound_A_to_B(trans_rider_multiplier, 5,4)

0.9705882352941176

Where are people traveling from to get to our district?

In [19]:
def get_top_sources(df, destination_district, district_dictionary):
    return(df.iloc[:,[0,1,destination_district+1]].sort_values(by=district_dictionary[destination_district],ascending=False))

In [20]:
get_top_sources(trans_2020_all, 19,dis_dict).head(5)

Unnamed: 0,index,dist_origin,Monroeville Pitcairn Turtle Creek
19,20,Wilkinsburg Wilkins Braddock and vicinity,256.0
18,19,Monroeville Pitcairn Turtle Creek,138.0
38,39,Westmoreland County,137.0
20,21,Wilmerding Wall North Versailles,97.0
16,17,Verona Oakmont Penn Hills,88.0


Where are people travleing to?

In [21]:
def get_top_destinations(df, origin_district, district_dictionary):
    origin_df = df.iloc[[origin_district-1],2:]
    origin_df = origin_df.T
    #origin_df = origin_df[origin_df[origin_district]!=0]
    origin_df = origin_df.sort_values(by=origin_district-1,ascending=False)
    origin_df.columns = [district_dictionary[origin_district]]
    return(origin_df)

In [22]:
get_top_destinations(trans_2020_all, 19, dis_dict).head(5)

Unnamed: 0,Monroeville Pitcairn Turtle Creek
Downtown,529
Oakland,187
Monroeville Pitcairn Turtle Creek,138
Wilkinsburg Wilkins Braddock and vicinity,73
Verona Oakmont Penn Hills,37


## On trip generation
* Trip Generation – Total number of trips generated in a zone, be they HB or NHB
* Trip Production – Defined as the home end of an HB trip or as the origin of an NHB trip
* Trip Attraction – Non-home end of an HB trip or destination of an NHB trip

Trip Generation process culminates in Trip Productions and Trip Attractions.

One traditional method is to get the sum total of trips by district, below:

In [23]:
def find_attractions(dist_list, df):
    return(df.iloc[:,[x+1 for x in dist_list]].sum())

def find_productions(dist_list, df):
    return(df.iloc[[x-1 for x in dist_list],:].sum())

In [24]:
find_attractions([19, 20, 21], trans_2020_all) # These are the districts for corridor 6

Monroeville Pitcairn Turtle Creek            1078.0
Wilkinsburg Wilkins Braddock and vicinity    2127.0
Wilmerding Wall North Versailles              262.0
dtype: float64

Let's combine these trip sums in a table that sums:
* Productions: Total # trips traveling out of region
* Attractions: Total # trips traveling to the region
* Trips: round trips are the sum of productions and attractions, divided by 2

In [25]:
def get_prods_attr_trips(df, district_num, district_dictionary):
    origin_df = df.iloc[[district_num],2:].T
    origin_df.reset_index(inplace=True)

    prod_attr = df.iloc[:,[0,1,district_num+1]]
    prod_attr = prod_attr.merge(origin_df, left_on='dist_origin', right_on='index', how='left')
    prod_attr = prod_attr[['dist_origin', district_dictionary[district_num], district_num]]
    prod_attr.columns = ['District', 'Productions', 'Attractions']
    prod_attr['Total_Trips'] = (prod_attr.Productions + prod_attr.Attractions)/2.0
    prod_attr
    return(prod_attr)

In [26]:
get_prods_attr_trips(trans_2020_all, 19, dis_dict).head(5)

Unnamed: 0,District,Productions,Attractions,Total_Trips
0,Pittsburgh Chateau Area,1.0,33,17.0
1,Pittsburgh Lower North Side,2.0,51,26.5
2,Pittsburgh Upper North Side,1.0,9,5.0
3,Allegheny County Northwest,1.0,12,6.5
4,Allegheny County North (remainder),17.0,94,55.5


To get a sum total of roundtrips in the region, one method is to add up these total trips. We do this across all districts to get a trip matrix:

In [27]:
def get_trip_matrix(df, dist_list, district_dictionary):
    first_dist = dist_list[0]
    matrix = get_prods_attr_trips(df, first_dist, district_dictionary)
    matrix = matrix.rename({'Total_Trips': district_dictionary[first_dist]}, axis='columns')
    for d in dist_list[1:]:
        matrix[dis_dict[d]] = get_prods_attr_trips(df, d, district_dictionary).Total_Trips
    matrix = matrix.drop(columns=['Productions', 'Attractions'])
    return(matrix)

In [28]:
matrix_2045 = get_trip_matrix(trans_2045_all, [19, 20, 21], dis_dict)
matrix_2045.head(2)

Unnamed: 0,District,Monroeville Pitcairn Turtle Creek,Wilkinsburg Wilkins Braddock and vicinity,Wilmerding Wall North Versailles
0,Pittsburgh Chateau Area,16,2.5,0.5
1,Pittsburgh Lower North Side,24,6.0,0.5


## Trip counts by route

We want to find out which routes start and end in districts. In the seperate gis-query.ipynb notebook, we've spatial joined stops by district (so for each stop, we know their associated routes and district). That join is read in below:

In [29]:
spc_route_join = pd.read_csv('..\Outputs\spc_routes_joined_consolid.csv')
spc_route_join.head()

Unnamed: 0,District,District Name,StopID,StopName,Routes Impacted
0,7.0,Squirrel Hill,E54470,TILBURY AVE OPP ALDERSON ST,"65, 74"
1,20.0,Wilkinsburg Wilkins Braddock and vicinity,E54550,TRENTON AVE AT FRANKLIN AVE,71C
2,20.0,Wilkinsburg Wilkins Braddock and vicinity,E54580,TRENTON AVE AT PENN AVE,71C
3,20.0,Wilkinsburg Wilkins Braddock and vicinity,E54585,TRENTON AVE AT PENN AVE FS,71C
4,20.0,Wilkinsburg Wilkins Braddock and vicinity,E54590,TRENTON AVE OPP REBECCA AVE NS,71C


In [30]:
def routefinder(routename, searchstring):
    '''The list of routes come in the form 69, 67 etc
       However we need to also account for P routes that may overlap
        such as 67 and P67 and keep them distinct'''

    tpatt = re.compile('^{0}| {0}'.format(routename))
    if tpatt.search(searchstring):
        print('Found {}'.format(routename))
        return(True)
    else:
        return(False)

We turn this into a dictionary for each district, whose values are the routes in their district:

In [31]:
def find_routes_dist(dist_no, spc_route_matcher):
    routeset = set()
    [routeset.update(r) for r in spc_route_join[spc_route_matcher['District']==dist_no]['Routes Impacted'].str.split()]
    routeset = {d.replace(',','') for d in routeset}
    return(routeset)

In [32]:
district_route_dict = {dist_no : find_routes_dist(dist_no, spc_route_join) for dist_no in range(1,45)}

Below we find a way to create a dataframe of route indicators for a single district

In [33]:
def route_matcher(routes, districtnum, district_dict_ref):
    ''' Given a list of routes, return an indicator 1 or 0 if each route is in the district
    '''
    return([int(r in district_dict_ref[districtnum]) for r in routes])

In order to assign ridership by route, we need the trip frequency. 
We get the weekday, sunday and saturday trip frequency from the schedule:

In [34]:
url = 'https://data.wprdc.org/dataset/d1eb0fcd-ba60-4407-9969-ceef464d0c00/resource/d42a9392-f95c-45f7-840f-3829ed21e03a/download/schedule_daily_agg.csv'
trip_count = pd.read_csv(url)

In [35]:
trip_count[(trip_count.PickID == 1909) & 
         #  (trip_count.DayType == 'Weekday') & 
           (trip_count.RouteCode ==  '67')].head(3)

Unnamed: 0,id,PickID,Date,DayType,Route,Service.Context,Trp.Dist.,Trip.Count,Trip_Mins,Schedule_Join,RouteCode
89402,89403,1909,2019-09-01,Sunday,67,Base,356.506,26,1326.84,Base_Sunday,67
89462,89463,1909,2019-09-02,Holiday1,67,Holiday,356.506,26,1326.84,Holiday_Holiday1,67
89537,89538,1909,2019-09-03,Weekday,67,Base,1015.077,59,4278.3,Base_Weekday,67


We also want to use the right schedule. Below is the lookup table for pick number (schedule number) by quarter:

In [36]:
pick = pd.read_csv('https://data.wprdc.org/dataset/b401859c-412b-4cb6-ad88-a4183b83183d/resource/3f789a37-d02b-4f2e-9212-3b824fb06678/download/paac_pick_lookup.csv')
pick = pick.dropna(axis='rows')
pick.tail()

Unnamed: 0,pickID,pickStart,pickEnd
21,1811,11/18/2018,3/16/2019
22,1903,3/17/2019,6/15/2019
23,1906,6/16/2019,8/31/2019
24,1909,9/1/2019,11/23/2019
25,1911,11/24/2019,3/21/2020


We are using 1909 as the most recent full pick used during our study time (and prior to covid-19). 

In [37]:
def find_trip_count(route, trip_df, daytype, pickno):
    #print(route)
    common = trip_df[(trip_df['PickID'] == pickno) &
              (trip_df['DayType'] == daytype) & 
              (trip_df['RouteCode'] == route)]['Trip.Count']
    if common.empty:
       # print('No match found')
        return(0)
    else:
        return(common.mode().iloc[0])

We start building our table, which will be all routes with column indicators if the route exists in the district. We get this list of routes from gtfs

In [38]:
route = pd.read_csv('../data/gtfs/routes.txt')

In [39]:
routetrip_wkdy = [find_trip_count(r, trip_count, 'Weekday', 1909) for r in route.route_short_name]
routetrip_sat = [find_trip_count(r, trip_count, 'Saturday', 1909) for r in route.route_short_name]
routetrip_sun = [find_trip_count(r, trip_count, 'Sunday', 1909) for r in route.route_short_name]

In [40]:
route_ref = route.copy()
route_ref['wkday_trips'] = routetrip_wkdy
route_ref['sat_trips'] = routetrip_sat
route_ref['sun_trips'] = routetrip_sun

In [41]:
route_spc_ref = route_ref
for dist_no in range(1,45):
    reference = route_matcher(route.route_short_name, dist_no, district_route_dict)
    route_spc_ref[str(dist_no)] = reference

route_spc_ref.head(2)

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,wkday_trips,sat_trips,...,35,36,37,38,39,40,41,42,43,44
0,001-183,PAAC,1,Freeport Road,,3,,,48,30,...,0,0,0,0,1,0,0,1,0,0
1,002-183,PAAC,2,Mount Royal,,3,,,53,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
route_spc_ref.to_csv('../Outputs/route_to_district_tb.csv')

Now that we know which routes are in which district, and the frequency of those routes, we can do trip assignment by route and district.
We also want to account for additional trips on a route shifting ridership to that route

In [43]:
def find_dist_ratio(dist_A, dist_B, dist_route_ref_dict,routeNewList=[], routeNewTripCountList=[]):
    '''
    Given two districts, find the routes that serve the two districts. 
    Based on the relative frequency of those routes, assign ridership by frequency.
    If we are adding trips to routes, that is indicated in routeNewList and their additional 
    trips in routeNewTripCountList
    '''
    overlap = list(dist_route_ref_dict[dist_A].intersection(dist_route_ref_dict[dist_B]))
    addRoutes = []
    for r in overlap:
        if r in routeNewList:
            addRoutes.append(routeNewTripCountList[routeNewList.index(r)])
        else:
            addRoutes.append(0)
    if len(overlap) > 0:
        ratiodict = {overlap[i]: (find_trip_count(overlap[i], trip_count, 'Weekday', 1909)+addRoutes[i]) for i in range(len(overlap))}
        total = sum(ratiodict.values())
        ratiodict = {r: v/(total)*1.0 for r,v in ratiodict.items()}
        return(ratiodict)
    else:
        return({})

In [44]:
print(find_dist_ratio(17, 20, district_route_dict))
print(find_dist_ratio(17, 20, district_route_dict, ['P78'], [10]))

{'P16': 0.1794871794871795, '79': 0.5, 'P78': 0.32051282051282054}
{'P16': 0.1686746987951807, '79': 0.46987951807228917, 'P78': 0.3614457831325301}


We also get an individual count of trips for a route given two districts. If the route doesn't exist, return 0. If we are adding trips, that is added to the wkday trip count:

In [45]:
def find_dist_spc(route_no, dist_A, dist_B, dist_route_ref_dict,routeAdd=0):
    overlap_routes = dist_route_ref_dict[dist_A].intersection(dist_route_ref_dict[dist_B])
    if route_no in overlap_routes:
        wkd_cnt = route_ref.loc[route_ref['route_short_name'] == route_no, ['wkday_trips']].iloc[0]['wkday_trips']
        return(wkd_cnt+routeAdd)
    else:
        return(0)

In [46]:
print(find_dist_spc('77', 17, 19, district_route_dict))
print(find_dist_spc('77', 17, 19, district_route_dict, routeAdd=100))
print(find_dist_spc('75', 17, 19, district_route_dict, routeAdd=100))

67
167
0


We make a new matrix of all 1935 44 by 44 district path combinations and indicate route trip counts in columns

In [47]:
dist_A = list(range(1,45))
dist_B = dist_A

spc_dist_full = pd.MultiIndex.from_product([dist_A, dist_B], names = ["dist_A", "dist_B"])
spc_dist_full = pd.DataFrame(index = spc_dist_full).reset_index()

In [48]:
dist_tuples = list(spc_dist_full.itertuples(index=False, name=None))

In [49]:
spc_from_to_routes = route.copy()
for t in dist_tuples:
    route_tuple_col = [find_dist_spc(r, t[0], t[1], district_route_dict) for r in route.route_short_name]
    spc_from_to_routes['d{0}-{1}'.format(t[0],t[1])] = route_tuple_col
spc_from_to_routes.head()
#spc_from_to_routes.to_csv('../Outputs/dist_from_to_routes.csv')

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,d1-1,d1-2,...,d44-35,d44-36,d44-37,d44-38,d44-39,d44-40,d44-41,d44-42,d44-43,d44-44
0,001-183,PAAC,1,Freeport Road,,3,,,0,0,...,0,0,0,0,0,0,0,0,0,0
1,002-183,PAAC,2,Mount Royal,,3,,,0,0,...,0,0,0,0,0,0,0,0,0,0
2,004-183,PAAC,4,Troy Hill,,3,,,0,0,...,0,0,0,0,0,0,0,0,0,0
3,006-183,PAAC,6,Spring Hill,,3,,,0,0,...,0,0,0,0,0,0,0,0,0,0
4,007-183,PAAC,7,Spring Garden,,3,,,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
# def find_ratio_routes(dist_A, dist_B, dist_route_ref_dict,routeNewList=[], routeNewTripCountList=[]):
#     overlap_routes = list(dist_route_ref_dict[dist_A].intersection(dist_route_ref_dict[dist_B]))
#     addRoutes = []
#     for r in overlap_routes:
#         if r in routeNewList:
#             addRoutes.append(routeNewTripCountList[routeNewList.index(r)])
#         else:
#             addRoutes.append(0)
#     print(addRoutes)
#     dist_A_B = [find_dist_spc(overlap_routes[i], dist_A, dist_B, dist_route_ref_dict, addRoutes[i]) for i in range(len(overlap_routes))]
#     return(dist_A_B)

A visual of trip frequency by the routes the run between districts

In [51]:
def make_interchange(dist_no_list, clean=True):
    ''' Given a list of districts or a single district, return the trip counts by route of the district{s}
    and all their overlapping routes
    '''
    spc_dist_mtx = pd.MultiIndex.from_product([dist_A, dist_no_list], names = ["dist_A", dist_no_list[0]])
    spc_dist_mtx = pd.DataFrame(index = spc_dist_mtx).reset_index()

    b_tuples = list(spc_dist_mtx
                    .itertuples(index=False, name=None))
    
    b_from_to_routes = route[['route_short_name']].copy()
    for t in b_tuples:
        route_tuple_coln = [find_dist_spc(r, t[0], t[1], district_route_dict) for r in route.route_short_name]
        b_from_to_routes['d{0}-{1}'.format(t[0],t[1])] = route_tuple_coln

    b_from_to_routes = b_from_to_routes[(b_from_to_routes.iloc[:,1:].T != 0).any()]
    b_from_to_routes = b_from_to_routes.transpose()
    btb_coln = b_from_to_routes.iloc[0] #grab the first row for the header
    b_from_to_routes = b_from_to_routes[1:] #take the data less the header row
    b_from_to_routes.columns = btb_coln
    if clean:
        b_from_to_routes = b_from_to_routes[(b_from_to_routes.iloc[:,1:].T != 0).any()]
    return(b_from_to_routes)

Filterint for just beyond the busway districts:

In [52]:
btb_dist = [11,25]+list(range(17,24))

In [53]:
#for d in btb_dist:
#    distdf = make_interchange([d])
#    distdf.to_csv('../Outputs/dist{0}_interchange.csv'.format(d))

Now we want the number of SPC trips assigned to each route by the frequency ratio.

In [54]:
def make_interchange_counts(dist_no_list, spcMatrix, clean=True, routeNewList=[], routeNewCntList=[]):
    '''
    Given a district or list or districts, goes district by district finding the frequency of relative routes
    and multiplying by the total district to district trip count to get the total SPC trip count by route.
    If we are adding frequency to routes, they are indicated in routeNewList and their respective frequency
    additions in routeNewCntList
    '''
    spc_dist_mtx = pd.MultiIndex.from_product([dist_A, dist_no_list], names = ["dist_A", dist_no_list[0]])
    spc_dist_mtx = pd.DataFrame(index = spc_dist_mtx).reset_index()

    b_tuples = list(spc_dist_mtx
                    .itertuples(index=False, name=None))
    #overlap_routes = dist_route_ref_dict[dist_A].intersection(dist_route_ref_dict[dist_B])
    b_from_to_routes = route[['route_short_name']].copy()
    #print(find_dist_ratio(17, 20, district_route_dict, ['P78'], [10]))
    for t in b_tuples:
        ratioRef = find_dist_ratio(t[0], t[1], district_route_dict, routeNewList, routeNewCntList)
        route_tuple_coln = [ratioRef.get(r,0) for r in route.route_short_name]
        b_from_to_routes[t] = route_tuple_coln
    b_from_to_routes = b_from_to_routes[(b_from_to_routes.iloc[:,1:].T != 0).any()]
    #print(b_from_to_routes)
    b_from_to_routes = b_from_to_routes.transpose()
    btb_coln = b_from_to_routes.iloc[0] #grab the first row for the header
    b_from_to_routes = b_from_to_routes[1:] #take the data less the header row
    b_from_to_routes.columns = btb_coln
    if clean:
        b_from_to_routes = b_from_to_routes[(b_from_to_routes.iloc[:,1:].T != 0).any()]
    print(b_from_to_routes.index)
    sumColn = [find_inbound_A_to_B(spcMatrix, t[0],t[1]) for t in b_from_to_routes.index]
    b_from_to_routes = b_from_to_routes.mul(sumColn, axis=0)
    multSPC = [find_inbound_A_to_B(trans_rider_multiplier, t[0],t[1]) for t in b_from_to_routes.index]
    b_from_to_routes['total'] = sumColn
    #b_from_to_routes['spc45'] = sum2045
    b_from_to_routes['Increase 20-45'] = multSPC
    b_from_to_routes.reset_index(inplace=True)
    b_from_to_routes.rename(columns={'index':'distFromTo'}, inplace=True)
    b_from_to_routes.insert(1, "dist_from",[int(i[0]) for i in b_from_to_routes['distFromTo']])
    b_from_to_routes.insert(2, "dist_to",[int(i[1]) for i in b_from_to_routes['distFromTo']])

    return(b_from_to_routes)

Here we get the full number of SPC trips by route for the whole system. This takes a few minutes to run

In [55]:
#route_spc_full = make_interchange_counts(list(range(1,45)), trans_2020_all, False, [], [])
#route_spc_full.to_csv('../outputs/full_spc_route_assignment.csv')

In [56]:
route_spc_full = pd.read_csv('../outputs/full_spc_route_assignment.csv')
route_spc_full.rename(columns={'Unnamed: 0':'distFromTo'}, inplace=True)
route_spc_full.distFromTo = [make_tuple(t) for t in route_spc_full.distFromTo]
route_spc_full.insert(1, "dist_from",[int(i[0]) for i in route_spc_full['distFromTo']])
route_spc_full.insert(2, "dist_to",[int(i[1]) for i in route_spc_full['distFromTo']])
route_spc_full.head()

Unnamed: 0,distFromTo,dist_from,dist_to,1,2,4,6,7,8,11,...,P76,P78,RED,Y1,Y45,Y46,Y47,Y49,total,Increase 20-45
0,"(1, 1)",1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,65.0,1.153846
1,"(1, 2)",1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,19.626459,0.0,0.0,0.0,0.0,0.0,97.0,1.072165
2,"(1, 3)",1,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,1.107143
3,"(1, 4)",1,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.0,1.048387
4,"(1, 5)",1,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,71.0,0.957746


`route_spc_full` now indicates for all tuple combinations of 44 districts, the trips estimated by route according to relative frequency.
The `Increase 20-45` column indicates the change from 2020 to 2045 that we will use as a multiplier later

## APC Counts and ridership assignment

Here we load in the number of APC ON and OFF's from a previous join on the stop level dataset of districts. 
By stop/route, we aggregate the APC_ON by route and district in `spc_apc`

In [57]:
spc_apc = pd.read_csv('../Data/joined-spc-rt.csv')

In [58]:
spc_apc['Dist_No'] = pd.to_numeric(spc_apc['Dist_No'], errors='coerce')

In [59]:
spc_apc[spc_apc['Dist_No'].isin(btb_dist)]

Unnamed: 0,Dist_No,Dist_Name,Route,APC_ON_SUM,APC_OFF_SUM,STOP_CNT
0,20.0,Wilkinsburg Wilkins Braddock and vicinity,71C,344,337,19
1,18.0,Plum Borough,P16,24,21,29
2,18.0,Plum Borough,77,27,24,35
3,17.0,Verona Oakmont Penn Hills,P16,287,285,98
6,17.0,Verona Oakmont Penn Hills,P17,87,5,31
...,...,...,...,...,...,...
349,22.0,McKeesport White Oak,P7,144,124,12
365,25.0,Baldwin Brentwood Whitehall West Mifflin,56,13,7,2
366,25.0,Baldwin Brentwood Whitehall West Mifflin,53L,5,1,1
496,11.0,Waterfront (West Homestead Homestead Munhall(p...,61C,527,559,19


Below we return the APC count for route and district, either getting the ON's or OFF's.

In [60]:
def find_apc_route_dist(dist, route, on=True):
    if on:
        apccol = 'APC_ON_SUM'
    else:
        apccol = 'APC_OFF_SUM'
    try:
        s = spc_apc.loc[(spc_apc.Dist_No == dist)&(spc_apc.Route==route),apccol].iloc[0]
        return s
    except:
        return 0

### Assigning APC count ridership destination or origin district by route onboard or offboard

Process:

1. Given a district origin, say 17 - 'Verona Oakmont Penn Hills', we take one of its routes

In [61]:
district_route_dict[17]

{'74', '77', '79', 'P16', 'P17', 'P78'}

Taking route 77 as an example, we find the SPC trip assignment of destinations for trips that END in district 17 on route 77 

In [62]:
r77 = route_spc_full.loc[(route_spc_full['dist_to']==17)&(route_spc_full['77']!=0),['distFromTo', 'dist_from', 'dist_to', 'Increase 20-45', '77']]
print('{} total SPC trips to district 17 on route 77'.format(sum(r77['77'])))
r77

156.96758238780126 total SPC trips to district 17 on route 77


Unnamed: 0,distFromTo,dist_from,dist_to,Increase 20-45,77
412,"(10, 17)",10,17,1.0,5.948598
500,"(12, 17)",12,17,2.545455,4.63522
544,"(13, 17)",13,17,1.066667,9.392523
632,"(15, 17)",15,17,1.060606,13.90566
720,"(17, 17)",17,17,1.004219,54.380137
764,"(18, 17)",18,17,1.125,11.284211
808,"(19, 17)",19,17,1.0,37.0
1820,"(42, 17)",42,17,0.932584,20.421233


We find the APC count of all APC_OFF (since we are going TO district 17) counts in district 77 for route 77:

In [63]:
find_apc_route_dist(17, '77', on=False)

630

This is a drastic difference - the SPC trips estimate 157 trips going to district 17 on route 77, while APC OFF's show 630 offboards on route 77 in district 17

We assign those APC counts by the same ratio of SPC trip counts across district origins for route 77 ending in district 17

In [64]:
r77['APC_19_curr'] = r77['77']/sum(r77['77'])*find_apc_route_dist(17, '77', on=False)
r77

Unnamed: 0,distFromTo,dist_from,dist_to,Increase 20-45,77,APC_19_curr
412,"(10, 17)",10,17,1.0,5.948598,23.875101
500,"(12, 17)",12,17,2.545455,4.63522,18.603769
544,"(13, 17)",13,17,1.066667,9.392523,37.697527
632,"(15, 17)",15,17,1.060606,13.90566,55.811308
720,"(17, 17)",17,17,1.004219,54.380137,218.258355
764,"(18, 17)",18,17,1.125,11.284211,45.289942
808,"(19, 17)",19,17,1.0,37.0,148.502001
1820,"(42, 17)",42,17,0.932584,20.421233,81.961998


This function puts it all together:

In [65]:
def route_destinations_to_d(dist_off, routename, spc_ref = route_spc_full):
    '''
    Taking a district and route, searches the SPC trip estimates for the route ending in district across destination districts
    Uses the same ratio of trip destinations to assign APC counts by origin
    Uses the 2020-2045 multiplier to get the 2045 estimates of SPC and APC ridership by origin
    '''
    # Checks if route is in the district
    if routename not in district_route_dict[dist_off]:
        print('Route {} not in District {}'.format(routename, dist_off))
        return None
    # Returns the SPC trip count of the route across all district pairs in DEST destrict
    rd = spc_ref.loc[(route_spc_full['dist_to']==dist_off)&(spc_ref[routename]!=0),['distFromTo', 'dist_from', 'dist_to', 'Increase 20-45', routename]]
    rd.rename(columns={routename: 'SPC_20'}, inplace=True)
    # Finds the APC OFF total in district for route
    apccount = find_apc_route_dist(dist_off, routename, on=False)
    # Finds the 2045 SPC estimate
    rd['SPC_45'] = rd['SPC_20']*rd['Increase 20-45']
    # Assigns origins by SPC ratio to the APC total count
    rd['APC_19_curr'] = rd['SPC_20']/sum(rd['SPC_20'])*apccount
    # Finds the 2045 projection from APC count
    rd['APC_45_proj'] = rd['APC_19_curr']*rd['Increase 20-45']
    rd.insert(0, 'route', routename)
    rd.insert(3, 'fromDistName', [dis_dict[d] for d in rd['dist_from']])
    rd.insert(5, 'toDistName', dis_dict[dist_off])
    rd = rd.drop(['Increase 20-45', 'distFromTo', ], axis=1)
    return(rd)

Analagously for FROM trips (using APC_ON) and starting in the district

In [66]:
def route_destinations_from_d(dist_on, routename):
    '''
    Taking a district and route, searches the SPC trip estimates for the route starting in district across destination districts
    Uses the same ratio of trip destinations to assign APC counts by destination
    Uses the 2020-2045 multiplier to get the 2045 estimates of SPC and APC ridership by destination
    '''
    # Checks if route is in the district
    if routename not in district_route_dict[dist_on]:
        print('Route {} not in District {}'.format(routename, dist_on))
        return None    
    # Returns the SPC trip count of the route across all district pairs in ORIGIN destrict
    rd = route_spc_full.loc[(route_spc_full['dist_from']==dist_on)&(route_spc_full[routename]!=0),['distFromTo', 'dist_from', 'dist_to', 'Increase 20-45', routename]]
    rd.rename(columns={routename: 'SPC_20'}, inplace=True)
    # Finds the APC ON total in district for route
    apccount = find_apc_route_dist(dist_on, routename, on=True)
    # Finds the 2045 SPC estimate
    rd['SPC_45'] = rd['SPC_20']*rd['Increase 20-45']
    # Assigns destinations by SPC ratio to the APC total count
    rd['APC_19_curr'] = rd['SPC_20']/sum(rd['SPC_20'])*apccount
    # Finds the 2045 projection from APC count
    rd['APC_45_proj'] = rd['APC_19_curr']*rd['Increase 20-45']
    rd.insert(0, 'route', routename)
    rd.insert(3, 'fromDistName', dis_dict[dist_on])
    rd.insert(5, 'toDistName', [dis_dict[d] for d in rd['dist_to']])
    rd = rd.drop(['Increase 20-45', 'distFromTo', ], axis=1)
    return(rd)

In [67]:
route_destinations_to_d(17, '77')

Unnamed: 0,route,dist_from,fromDistName,dist_to,toDistName,SPC_20,SPC_45,APC_19_curr,APC_45_proj
412,77,10,Shadyside,17,Verona Oakmont Penn Hills,5.948598,5.948598,23.875101,23.875101
500,77,12,Downtown,17,Verona Oakmont Penn Hills,4.63522,11.798742,18.603769,47.355049
544,77,13,East Liberty,17,Verona Oakmont Penn Hills,9.392523,10.018692,37.697527,40.210696
632,77,15,Pittsburgh East (remainder),17,Verona Oakmont Penn Hills,13.90566,14.748428,55.811308,59.193811
720,77,17,Verona Oakmont Penn Hills,17,Verona Oakmont Penn Hills,54.380137,54.609589,218.258355,219.179276
764,77,18,Plum Borough,17,Verona Oakmont Penn Hills,11.284211,12.694737,45.289942,50.951184
808,77,19,Monroeville Pitcairn Turtle Creek,17,Verona Oakmont Penn Hills,37.0,37.0,148.502001,148.502001
1820,77,42,Homewood/Lincoln-Lemington,17,Verona Oakmont Penn Hills,20.421233,19.044521,81.961998,76.43647


Now we want to do this for a whole district and all of their routes

In [205]:
def make_dist_from_df(dist_from, spc_ref = route_spc_full):
    routes_b = district_route_dict[dist_from]
    routes_b = list(routes_b.intersection(set(route_spc_full.columns)))
    if len(routes_b) < 1:
        return(None)
    #print(routes_b[1:])
    rider_df = route_destinations_from_d(dist_from, routes_b[0])
    if len(routes_b) > 1:
        for r in routes_b[1:]:
            rider_df = rider_df.append(route_destinations_from_d(dist_from, r))
    return(rider_df)

In [69]:
def make_dist_to_df(dist_to, spc_ref = route_spc_full):
    routes_b = district_route_dict[dist_to]
    routes_b = list(routes_b.intersection(set(spc_ref.columns)))
    if len(routes_b) < 1:
        return(None)
    #print(routes_b[1:])
    rider_df = route_destinations_to_d(dist_to, routes_b[0])
    if len(routes_b) > 1:
        for r in routes_b[1:]:
            rider_df = rider_df.append(route_destinations_to_d(dist_to, r))
    return(rider_df)

In [70]:
make_dist_from_df(17).tail(2)

['74', 'P78', '77', '79', 'P17']


Unnamed: 0,route,dist_from,fromDistName,dist_to,toDistName,SPC_20,SPC_45,APC_19_curr,APC_45_proj
720,P17,17,Verona Oakmont Penn Hills,17,Verona Oakmont Penn Hills,11.363014,11.410959,5.757362,5.781655
745,P17,17,Verona Oakmont Penn Hills,42,Homewood/Lincoln-Lemington,8.342466,9.30137,4.226924,4.712778


Now we find these estimates for all 44 of our districts

In [210]:
from_df = make_dist_from_df(1)
for b in range(2,45):
    print(b)
    try:
        new_df = make_dist_from_df(b)
    except:
        continue
    #new_df.to_csv('../outputs/dist{}_route_destination_ridership.csv'.format(b))
    if new_df is not None:
        from_df = from_df.append(new_df)
from_df.to_csv('../outputs/route_destinations_from_dist.csv')

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44


In [72]:
to_df = make_dist_to_df(1)
for b in range(2,45):
    print(b)
    try:
        new_df = make_dist_from_df(b)
    except:
        continue
    #new_df.to_csv('../outputs/dist{}_route_destination_ridership.csv'.format(b))
    if new_df is not None:
        to_df = to_df.append(new_df)
to_df.to_csv('../outputs/route_destinations_to_dist.csv')

2
['15', '8', '17', '54', '18', '2', '11', '16', 'RED', '13', '4', '7', '12', 'O5', '1', '6']
3
['8', '17', '14', '11', '2', '6', '16', '13', '19L', '4', 'O12', '7', '12', 'O5', '1', '54']
4
['16', '13', '19L', '21']
5
['P16', '8', '91', '17', '2', 'P78', 'P10', 'O12', '4', '13', 'O1', '7', '12', '75', 'O5', '1']
6
['52L', '58', '57', '61C', '53L', '93', '61D', '56']
7
['61C', '71', '64', '28X', '65', '71D', '53L', '57', '74', '61A', '67', '61D', '61B', '52L', '71B', 'P71', '69', '93']
8
['81', '75', '61C', '82', '71A', '71C', '28X', '65', '71D', '57', '83', '61A', '67', '61D', '54', '61B', '71B', '69', '93', 'P3', '56']
9
['81', '75', '43', '51', '48', '54']
10
['P7', '75', 'P2', '82', '64', '71A', 'P68', 'P1', '71C', '28X', 'P78', '89', 'P10', 'P76', '88', 'P16', '74', '77', 'P67', 'P69', 'P17', '71B', 'P71', 'P12', '71D', 'P3']
11
['52L', '59', '61C', '53L', '57', '61D']
12
['27', '81', '61C', '82', '17', 'P68', '28X', 'P78', 'P10', '29', 'G3', '8', '51L', 'P67', 'O1', '67', 'G31', 

## Changing a trip frequency

Can we make a function that just returns the SPC trip assignment by route?
That is, given a district and route (and any additional frequency) we get the following, without building the whole dataframe?

In [73]:
route_spc_p7_1 = make_interchange_counts(list(range(1,45)), trans_2020_all, False, ['P7'], [26])

Index([  (1, 1),   (1, 2),   (1, 3),   (1, 4),   (1, 5),   (1, 6),   (1, 7),
         (1, 8),   (1, 9),  (1, 10),
       ...
       (44, 35), (44, 36), (44, 37), (44, 38), (44, 39), (44, 40), (44, 41),
       (44, 42), (44, 43), (44, 44)],
      dtype='object', length=1936)


In [89]:
route_spc_p7_1.head()

route_short_name,distFromTo,dist_from,dist_to,1,2,4,6,7,8,11,...,P76,P78,RED,Y1,Y45,Y46,Y47,Y49,total,Increase 20-45
0,"(1, 1)",1,1,0,0,0,0,0,0,0,...,0,0,12.0,0,0,0,0,0,65.0,1.153846
1,"(1, 2)",1,2,0,0,0,0,0,0,0,...,0,0,19.6265,0,0,0,0,0,97.0,1.072165
2,"(1, 3)",1,3,0,0,0,0,0,0,0,...,0,0,0.0,0,0,0,0,0,28.0,1.107143
3,"(1, 4)",1,4,0,0,0,0,0,0,0,...,0,0,0.0,0,0,0,0,0,62.0,1.048387
4,"(1, 5)",1,5,0,0,0,0,0,0,0,...,0,0,0.0,0,0,0,0,0,71.0,0.957746


In [195]:
def subtract_route_d(orig,new):
    orig2 = orig.iloc[:,3:-2]
    grab_columns = orig2.columns
    orig2 = orig2.values
    new2 = new.iloc[:,3:-2].values
    c = np.subtract(new2, orig2, out=np.zeros_like(new2))
    c = np.around(c.astype(np.double),2)
    c = pd.DataFrame(c)
    c.columns = grab_columns
    c.insert(0, 'distFromTo', orig.distFromTo)
    c.insert(1, 'dist_from',  orig.dist_from)
    c.insert(2, 'dist_to',  orig.dist_to)
    print(c.shape)
    c = c[(c.iloc[:,3:].T.abs() > 0.5).any()]
    print(c.shape)
    mask = [True, True, True] + list((c.iloc[:,3:].abs() > 0.5).any(axis=0))
    c = c.loc[:, mask]
    print(c.shape)
    c['total'] = orig.total
    c['Increase 20-45'] = orig['Increase 20-45']
    return(c)

We generated the easier to read matrix of district to distric route counts, only showing the difference. We can represent the SPC 2045 difference with the multiplier, given these properties:
    
* spc_route_A = Atrips/All_trips * spc_total
* spc_route_A_2045 = Atrips/All_trips * spc_total * multiplier2045
* spc_route_A_new = (Atrips + AtripsMore)/(All_trips + AtripsMore) * spc_total
* spc_route_A_new_2045 = (Atrips + AtripsMore)/(All_trips + AtripsMore) * spc_total * multplier2045


In [207]:
def route_destinations_no_apc(dist_on, routename, spc_ref):
    '''
    Taking a district and route, searches the SPC trip estimates for the route starting in district across destination districts
    Uses the same ratio of trip destinations to assign APC counts by destination
    Uses the 2020-2045 multiplier to get the 2045 estimates of SPC and APC ridership by destination
    '''
    # Checks if route is in the district
    if routename not in district_route_dict[dist_on]:
        print('Route {} not in District {}'.format(routename, dist_on))
        return None    
    # Returns the SPC trip count of the route across all district pairs in ORIGIN destrict
    rd = spc_ref.loc[(spc_ref['dist_from']==dist_on)&(spc_ref[routename]!=0),['distFromTo', 'dist_from', 'dist_to', 'Increase 20-45', routename]]
    rd.rename(columns={routename: 'SPC_20'}, inplace=True)
    # Finds the 2045 SPC estimate
    rd['SPC_45'] = rd['SPC_20']*rd['Increase 20-45']
    # Assigns destinations by SPC ratio to the APC total count
    rd.insert(0, 'route', routename)
    rd.insert(3, 'fromDistName', dis_dict[dist_on])
    rd.insert(5, 'toDistName', [dis_dict[d] for d in rd['dist_to']])
    rd = rd.drop(['Increase 20-45', 'distFromTo', ], axis=1)
    return(rd)

In [211]:
def make_dist_df_no_apc(dist_to, spc_ref = route_spc_full):
    routes_b = district_route_dict[dist_to]
    routes_b = list(routes_b.intersection(set(spc_ref.columns)))
    if len(routes_b) < 1:
        return(None)
    #print(routes_b[1:])
    rider_df = route_destinations_no_apc(dist_to, routes_b[0], spc_ref)
    if len(routes_b) > 1:
        for r in routes_b[1:]:
            rider_df = rider_df.append(route_destinations_no_apc(dist_to, r, spc_ref))
    return(rider_df)

In [214]:
routediff_p7_1 = subtract_route_d(route_spc_full, route_spc_p7_1)
routediff_p7_1.to_csv('../Outputs/p7_diff_1.csv')
from_df_diff_p71 = make_dist_df_no_apc(1, routediff_p7_1)
for b in range(2,45):
    try:
        from_df_diff_p7_1 = make_dist_df_no_apc(b, routediff_p7_1)
    except:
        continue
    #new_df.to_csv('../outputs/dist{}_route_destination_ridership.csv'.format(b))
    if from_df_diff_p7_1 is not None:
        from_df_diff_p7_1 = from_df_diff_p7_1.append(from_df_diff_p7_1)

(1936, 99)
(50, 99)
(50, 44)


In [215]:
from_df_diff_p7_1

In [88]:
print(route_spc_full.shape)
print(route_spc_p7_1.shape)

(1936, 101)
(1936, 101)


In [82]:
route_spc_p7_1.head()

route_short_name,distFromTo,dist_from,dist_to,1,2,4,6,7,8,11,...,P76,P78,RED,Y1,Y45,Y46,Y47,Y49,total,Increase 20-45
0,"(1, 1)",1,1,0,0,0,0,0,0,0,...,0,0,12.0,0,0,0,0,0,65.0,1.153846
1,"(1, 2)",1,2,0,0,0,0,0,0,0,...,0,0,19.6265,0,0,0,0,0,97.0,1.072165
2,"(1, 3)",1,3,0,0,0,0,0,0,0,...,0,0,0.0,0,0,0,0,0,28.0,1.107143
3,"(1, 4)",1,4,0,0,0,0,0,0,0,...,0,0,0.0,0,0,0,0,0,62.0,1.048387
4,"(1, 5)",1,5,0,0,0,0,0,0,0,...,0,0,0.0,0,0,0,0,0,71.0,0.957746


In [76]:
from_df_p71 = make_dist_to_df(1, roue_spc_p7_1)
for b in range(2,45):
    print(b)
    try:
        new_df = make_dist_from_df(b, roue_spc_p7_1)
    except:
        continue
    #new_df.to_csv('../outputs/dist{}_route_destination_ridership.csv'.format(b))
    if new_df is not None:
        from_df = from_df.append(new_df)

2
['15', '8', '17', '54', '18', '2', '11', '16', 'RED', '13', '4', '7', '12', 'O5', '1', '6']
3
['8', '17', '14', '11', '2', '6', '16', '13', '19L', '4', 'O12', '7', '12', 'O5', '1', '54']
4
['16', '13', '19L', '21']
5
['P16', '8', '91', '17', '2', 'P78', 'P10', 'O12', '4', '13', 'O1', '7', '12', '75', 'O5', '1']
6
['52L', '58', '57', '61C', '53L', '93', '61D', '56']
7
['61C', '71', '64', '28X', '65', '71D', '53L', '57', '74', '61A', '67', '61D', '61B', '52L', '71B', 'P71', '69', '93']
8
['81', '75', '61C', '82', '71A', '71C', '28X', '65', '71D', '57', '83', '61A', '67', '61D', '54', '61B', '71B', '69', '93', 'P3', '56']
9
['81', '75', '43', '51', '48', '54']
10
['P7', '75', 'P2', '82', '64', '71A', 'P68', 'P1', '71C', '28X', 'P78', '89', 'P10', 'P76', '88', 'P16', '74', '77', 'P67', 'P69', 'P17', '71B', 'P71', 'P12', '71D', 'P3']
11
['52L', '59', '61C', '53L', '57', '61D']
12
['27', '81', '61C', '82', '17', 'P68', '28X', 'P78', 'P10', '29', 'G3', '8', '51L', 'P67', 'O1', '67', 'G31', 

In [78]:
from_df_p71.head()

Unnamed: 0,route,dist_from,fromDistName,dist_to,toDistName,SPC_20,SPC_45,APC_19_curr,APC_45_proj
0,15,1,Pittsburgh Chateau Area,1,Pittsburgh Chateau Area,4.538462,5.236686,1.703814,1.965939
44,15,2,Pittsburgh Lower North Side,1,Pittsburgh Chateau Area,6.810636,6.810636,2.556826,2.556826
88,15,3,Pittsburgh Upper North Side,1,Pittsburgh Chateau Area,17.196682,18.175355,6.455922,6.823332
484,15,12,Downtown,1,Pittsburgh Chateau Area,6.073529,7.335561,2.280104,2.753892
572,15,14,Strip District,1,Pittsburgh Chateau Area,8.0,16.0,3.003334,6.006667


In [79]:
from_df_p71.to_csv('../outputs/route_destinations_from_dist_p7_1.csv')
from_df_p71.loc[(from_df_p71['P7']!=0),['distFromTo', 'dist_from', 'dist_to', 'Increase 20-45', 'P7']]#.to_csv('../outputs/p7.csv')

KeyError: 'P7'