Modify canonical operators and technologies based on the spreadsheet located in \Box\Modeling and Surveys\Surveys\check_canonical_routes_technology_v2.xlsx. A manual review was conducted by Shimon and Yuqi in April 2021 to create this spreadsheet.

In [1]:
import pandas as pd
import os

In [2]:
df_old = pd.read_csv('C:\\Users\\{}\\Documents\\GitHub\\onboard-surveys\\make-uniform\\production\\canonical_route_crosswalk.csv'.format(os.getenv('USERNAME')))
display(df_old.head())
print('the old canonical_route_crosswalk has {} unique canonical names'.format(len(df_old.canonical_name.unique())))

Unnamed: 0,survey,survey_year,survey_name,canonical_name,canonical_operator,technology
0,AC Transit,2018,AC Transit 1 - San Leandro Bart- Dtn. Oakland,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,local bus
1,AC Transit,2018,ROUTE 1 - San Leandro Bart- Dtn. Oakland [TO D...,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,local bus
2,AC Transit,2018,ROUTE 1 - San Leandro Bart- Dtn. Oakland [TO S...,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,local bus
3,AC Transit,2018,AC Transit 10 - San Leandro BART - Hayward BART,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,local bus
4,AC Transit,2018,ROUTE 10 - San Leandro BART - Hayward BART [TO...,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,local bus


the old canonical_route_crosswalk has 4301 unique canonical names


In [3]:
df_note = pd.read_excel('C:\\Users\\{}\\Box\\Modeling and Surveys\\Surveys\\check_canonical_routes_technology_v2.xlsx'.format(os.getenv('USERNAME')),
                       sheet_name='unique_canonical_routes')
display(df_note.head())

# update 'canonical_name' with 'new_canonical_name' if available
df_note['old_canonical_name'] = df_note['canonical_name']
df_note.loc[df_note.new_canonical_name.notnull(), 'canonical_name'] = df_note['new_canonical_name']

# keep need fields
df_note = df_note[['old_canonical_name', 'canonical_name', 'operator',
                   'canonical_operator', 'technology', 'canonical_technology']]
# rename to be consistent with the previous canonical route crosswalk
df_note.columns = ['canonical_name_old', 'canonical_name_new', 'operator_detail',
                   'canonical_operator_new', 'technology_detail', 'technology_new']

# fill in technology_detail and operator_detail
df_note.loc[df_note.operator_detail.isnull(), 'operator_detail'] = df_note['canonical_operator_new']
df_note.loc[df_note.technology_detail.isnull(), 'technology_detail'] = df_note['technology_new']

# make sure no duplicated rows, and no duplicates in 'canonical_name' field
print('df_note has {} rows, {} unique canonical names'.format(df_note.shape[0],
                                                              len(df_note.canonical_name_old.unique())))
print('df_note contains no duplicated rows: {}'.format(df_note.shape[0] == df_note.drop_duplicates().shape[0]))

display(df_note.head())

Unnamed: 0,canonical_name,new_canonical_name,operator,canonical_operator,technology,canonical_technology,Notes
0,AC TRANSIT___1 Berkeley BART to Bay Fair BART,,,AC TRANSIT,,local bus,
1,AC TRANSIT___10 San Leandro BART Hayward BART,,,AC TRANSIT,,local bus,
2,AC TRANSIT___12 Berkeley BART to Downtown Oakland,,,AC TRANSIT,,local bus,
3,AC TRANSIT___14 Downtown Oakland to Fruitvale ...,,,AC TRANSIT,,local bus,
4,AC TRANSIT___18 University Village Albany to M...,,,AC TRANSIT,,local bus,


df_note has 4301 rows, 4301 unique canonical names
df_note contains no duplicated rows: True


Unnamed: 0,canonical_name_old,canonical_name_new,operator_detail,canonical_operator_new,technology_detail,technology_new
0,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,AC TRANSIT,local bus,local bus
1,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,AC TRANSIT,local bus,local bus
2,AC TRANSIT___12 Berkeley BART to Downtown Oakland,AC TRANSIT___12 Berkeley BART to Downtown Oakland,AC TRANSIT,AC TRANSIT,local bus,local bus
3,AC TRANSIT___14 Downtown Oakland to Fruitvale ...,AC TRANSIT___14 Downtown Oakland to Fruitvale ...,AC TRANSIT,AC TRANSIT,local bus,local bus
4,AC TRANSIT___18 University Village Albany to M...,AC TRANSIT___18 University Village Albany to M...,AC TRANSIT,AC TRANSIT,local bus,local bus


In [4]:
# join note back to canonical_route_crosswalk
df = df_old.merge(df_note,
                  left_on='canonical_name',
                  right_on='canonical_name_old',
                  how='left')
display(df.head())

Unnamed: 0,survey,survey_year,survey_name,canonical_name,canonical_operator,technology,canonical_name_old,canonical_name_new,operator_detail,canonical_operator_new,technology_detail,technology_new
0,AC Transit,2018,AC Transit 1 - San Leandro Bart- Dtn. Oakland,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,local bus,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,AC TRANSIT,local bus,local bus
1,AC Transit,2018,ROUTE 1 - San Leandro Bart- Dtn. Oakland [TO D...,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,local bus,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,AC TRANSIT,local bus,local bus
2,AC Transit,2018,ROUTE 1 - San Leandro Bart- Dtn. Oakland [TO S...,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,local bus,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,AC TRANSIT,local bus,local bus
3,AC Transit,2018,AC Transit 10 - San Leandro BART - Hayward BART,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,local bus,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,AC TRANSIT,local bus,local bus
4,AC Transit,2018,ROUTE 10 - San Leandro BART - Hayward BART [TO...,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,local bus,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,AC TRANSIT,local bus,local bus


In [5]:
# check canonical routes that didn't find a join - should be none
df.loc[df.canonical_name_old.isnull()][['survey','survey_year','survey_name',
                                        'canonical_name','canonical_operator','technology',
                                        'canonical_name_new', 'canonical_operator_new', 'technology_new']]

Unnamed: 0,survey,survey_year,survey_name,canonical_name,canonical_operator,technology,canonical_name_new,canonical_operator_new,technology_new


In [6]:
# examine where canonical_operator have changed
df.loc[df.canonical_name != df.canonical_name_new][['canonical_name','canonical_operator','technology',
                                                    'canonical_name_new', 'canonical_operator_new', 'technology_new']].drop_duplicates()

Unnamed: 0,canonical_name,canonical_operator,technology,canonical_name_new,canonical_operator_new,technology_new
219,AC TRANSIT___Alameda County employee shuttle (...,AC TRANSIT,local bus,Alameda County___Alameda County employee shutt...,Bay Area Shuttles,local bus
220,"AC TRANSIT___Alta Bates Shuttles (MacArthur, R...",AC TRANSIT,local bus,"Alta Bates___Alta Bates Shuttles (MacArthur, R...",Bay Area Shuttles,local bus
233,AC TRANSIT___DB Dumbarton Express,AC TRANSIT,express bus,DUMBARTON___DB Dumbarton Express,DUMBARTON,express bus
235,AC TRANSIT___DB1 Dumbarton Express,AC TRANSIT,local bus,DUMBARTON___DB1 Dumbarton Express,DUMBARTON,express bus
238,AC TRANSIT___DB1 Union City BART to Stanford I...,AC TRANSIT,local bus,DUMBARTON___DB1 Dumbarton Express,DUMBARTON,express bus
517,AC TRANSIT___DB Union City BART to Stanford Oval,AC TRANSIT,express bus,DUMBARTON___DB Union City BART to Stanford Oval,DUMBARTON,express bus
520,AC TRANSIT___Estuary Crossing College of Alame...,AC TRANSIT,local bus,City of Alameda___Estuary Crossing College of ...,Bay Area Shuttles,local bus
526,AirTrain___AirTrain (SF Airport),AirTrain,heavy rail,SFO AirTrain___AirTrain (SF Airport),AirTrain,heavy rail
2515,CALTRAIN___Burlingame Bayside Shuttle,CALTRAIN,local bus,SanMateo___Burlingame Bayside Shuttle,Bay Area Shuttles,local bus
2516,CALTRAIN___Crocker Park Brisbane Shuttle (Balb...,CALTRAIN,local bus,SanMateo___Crocker Park Brisbane Shuttle (Balb...,Bay Area Shuttles,local bus


In [7]:
# examine where technology has changed
df.loc[df.technology != df.technology_new][['canonical_name','canonical_operator','technology',
                                            'canonical_name_new', 'canonical_operator_new',
                                            'technology_detail', 'technology_new']].drop_duplicates()

Unnamed: 0,canonical_name,canonical_operator,technology,canonical_name_new,canonical_operator_new,technology_detail,technology_new
235,AC TRANSIT___DB1 Dumbarton Express,AC TRANSIT,local bus,DUMBARTON___DB1 Dumbarton Express,DUMBARTON,express bus,express bus
238,AC TRANSIT___DB1 Union City BART to Stanford I...,AC TRANSIT,local bus,DUMBARTON___DB1 Dumbarton Express,DUMBARTON,express bus,express bus
265,AC TRANSIT___NX1 Fruitvale Ave & MacArthur Blvd,AC TRANSIT,local bus,AC TRANSIT___NX1 Fruitvale Ave & MacArthur Blvd,AC TRANSIT,express bus,express bus
266,AC TRANSIT___NX2 High St & MacArthur Blvd,AC TRANSIT,local bus,AC TRANSIT___NX2 High St & MacArthur Blvd,AC TRANSIT,express bus,express bus
267,AC TRANSIT___NX3 Marlow Dr & Foothill Way Oakland,AC TRANSIT,local bus,AC TRANSIT___NX3 Marlow Dr & Foothill Way Oakland,AC TRANSIT,express bus,express bus
269,AC TRANSIT___NX4 Castro Valley Park & Ride,AC TRANSIT,local bus,AC TRANSIT___NX4 Castro Valley Park & Ride,AC TRANSIT,express bus,express bus
420,COUNTY CONNECTION___92X ACE Express,COUNTY CONNECTION,local bus,COUNTY CONNECTION___92X ACE Express,COUNTY CONNECTION,express bus,express bus
2554,COUNTY CONNECTION___91X Concord Commuter Express,COUNTY CONNECTION,local bus,COUNTY CONNECTION___91X Concord Commuter Express,COUNTY CONNECTION,express bus,express bus
2556,COUNTY CONNECTION___93X Kirker Pass Express,COUNTY CONNECTION,local bus,COUNTY CONNECTION___93X Kirker Pass Express,COUNTY CONNECTION,express bus,express bus
2557,COUNTY CONNECTION___95X San Ramon BART Walnut ...,COUNTY CONNECTION,local bus,COUNTY CONNECTION___95X San Ramon BART Walnut ...,COUNTY CONNECTION,express bus,express bus


In [8]:
# keep only needed fields and rename the fields

df_clean = df[['survey', 'survey_year', 'survey_name', 'canonical_name_new',
               'canonical_operator_new', 'operator_detail',
               'technology_new', 'technology_detail']]
df_clean.rename(columns = {'canonical_name_new': 'canonical_name',
                           'canonical_operator_new': 'canonical_operator',
                           'technology_new': 'technology'}, inplace=True)
display(df_clean.head())

# check for duplicates
print('the updated crosswalk has {} unique canonical names'.format(len(df_clean.canonical_name.unique())))
print('the updated crosswalk contains no duplicated crosswalk: {}'.format(
    df_clean.shape[0] == df_clean[['survey', 'survey_year', 'survey_name', 'canonical_name']].drop_duplicates().shape[0]))


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,survey,survey_year,survey_name,canonical_name,canonical_operator,operator_detail,technology,technology_detail
0,AC Transit,2018,AC Transit 1 - San Leandro Bart- Dtn. Oakland,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,AC TRANSIT,local bus,local bus
1,AC Transit,2018,ROUTE 1 - San Leandro Bart- Dtn. Oakland [TO D...,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,AC TRANSIT,local bus,local bus
2,AC Transit,2018,ROUTE 1 - San Leandro Bart- Dtn. Oakland [TO S...,AC TRANSIT___1 Berkeley BART to Bay Fair BART,AC TRANSIT,AC TRANSIT,local bus,local bus
3,AC Transit,2018,AC Transit 10 - San Leandro BART - Hayward BART,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,AC TRANSIT,local bus,local bus
4,AC Transit,2018,ROUTE 10 - San Leandro BART - Hayward BART [TO...,AC TRANSIT___10 San Leandro BART Hayward BART,AC TRANSIT,AC TRANSIT,local bus,local bus


the updated crosswalk has 4291 unique canonical names
the updated crosswalk contains no duplicated crosswalk: True


In [9]:
# export to replace the previous crosswalk
df_clean.to_csv('C:\\Users\\{}\\Documents\\GitHub\\onboard-surveys\\make-uniform\\production\\canonical_route_crosswalk.csv'.format(os.getenv('USERNAME')),
               index=False)