# Overview

This Jupyter Notebook generates `lines.json`, which is used by the MyBus tool to populate the line dropdown on the landing page.

This version is made specifically for the September 2021 shakeup.



In [1]:
import pandas as pd
import numpy as np

In [2]:
# Use a local copy of the GTFS in case the version in the GitLab repository changes.
# https://gitlab.com/LACMTA/gtfs_bus/-/raw/master/routes.txt
ROUTES_PATH = '../data/input/routes.txt'

OUTPUT_PATH = '../data/'

# scratch work folder, this directory is git-ignored
SCRATCH_PATH = 'scratch/'

In [3]:
lines_df = pd.read_csv(ROUTES_PATH, 
    usecols={'route_id', 'route_short_name'},
    dtype={'route_id':'string', 'route_short_name':'string'})

lines_df

Unnamed: 0,route_id,route_short_name
0,2-13149,2
1,4-13149,4
2,10-13149,10/48
3,14-13149,14/37
4,16-13149,16
...,...,...
110,901-13149,
111,910-13149,
112,DSE-HG,South Bay Dodger Stadium Express
113,DSE-US,Dodger Stadium Express


In [4]:
# Remove the HASTUS build number from the route_id.
# Not sure if the front-end actually needs it or not, so we may need to put it back.

lines_df.route_id = lines_df.route_id.str.replace(r'-.*', '', regex=True)

lines_df

Unnamed: 0,route_id,route_short_name
0,2,2
1,4,4
2,10,10/48
3,14,14/37
4,16,16
...,...,...
110,901,
111,910,
112,DSE,South Bay Dodger Stadium Express
113,DSE,Dodger Stadium Express


## Modify the Routes List

Start: 118 rows

1. Remove the temporary shuttles (Dodger Stadium Express, SoFi Stadium Express, and L Line (Gold)).
2. Add `route_short_name` for the Orange and Silver Lines.
3. Split the sister-routes.

In [5]:
# 1. Remove the temporary shuttles (Dodger Stadium Express, SoFi Stadium Express).
# 2x Dodger Stadium Express
# 1x SoFi Stadium Express
# 118 - 3 = 115 rows after this

lines_df = lines_df.loc[~lines_df.route_id.isin(['DSE', 'SOFI'])]

lines_df

Unnamed: 0,route_id,route_short_name
0,2,2
1,4,4
2,10,10/48
3,14,14/37
4,16,16
...,...,...
107,754,754
108,761,761
109,854,
110,901,


In [6]:
# 2. Add route_short_name values for the 901 (Orange Line) and 910/950 (Silver Line)
lines_df.loc[lines_df.route_id == '910', 'route_short_name'] = '910/950'
lines_df.loc[lines_df.route_id == '901', 'route_short_name'] = '901'

# And the L Line Shuttle
lines_df.loc[lines_df.route_id == '854', 'route_short_name'] = '854'

# Fix route_short_name values for the 236 and 487 to add in the 235 and 489.
# They are wrongly missing from the GTFS.
lines_df.loc[lines_df.route_id == '236', 'route_short_name'] = '236/235'
lines_df.loc[lines_df.route_id == '487', 'route_short_name'] = '487/489'

lines_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,route_id,route_short_name
0,2,2
1,4,4
2,10,10/48
3,14,14/37
4,16,16
...,...,...
107,754,754
108,761,761
109,854,854
110,901,901


In [7]:
# Add in a route_number column
lines_df['route_number'] = lines_df.route_id

lines_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,route_id,route_short_name,route_number
0,2,2,2
1,4,4,4
2,10,10/48,10
3,14,14/37,14
4,16,16,16


In [8]:
lines_separated = lines_df.loc[lines_df.route_short_name.str.contains('/'), 'route_short_name'].values

# 10 rows have sister routes
lines_separated

<StringArray>
[  '10/48',   '14/37',   '35/38',   '78/79', '211/215', '224/690', '236/235',
 '242/243', '487/489', '910/950']
Length: 10, dtype: string

In [9]:
# 3. Split the sister routes
# This will double the number of sister routes, adding 8 rows to the total.
# 115 + 10 = 125 rows
lines_separated = lines_df.loc[lines_df.route_short_name.str.contains('/'), 'route_short_name'].values

for i, l in enumerate(lines_separated):
    route_id = lines_df.loc[lines_df.route_short_name == l]['route_id'].values[0]
    slash = l.find('/')
    line1 = l[:slash]
    line2 = l[slash+1:]
    
    lines_df = lines_df.loc[~lines_df.route_id.isin([route_id])]
    newlines = pd.DataFrame([[route_id, line1, line1], [route_id, line2, line2]], columns=['route_id', 'route_number', 'route_short_name'])
    lines_df = lines_df.append(newlines, ignore_index=True)

# cast route_short_name to int32 so that we can sort by their integer value
lines_df = lines_df.astype({'route_short_name': 'int32'}).sort_values('route_short_name')

lines_df.tail

<bound method NDFrame.tail of     route_id  route_short_name route_number
0          2                 2            2
1          4                 4            4
102       10                10           10
104       14                14           14
2         16                16           16
..       ...               ...          ...
99       761               761          761
100      854               854          854
101      901               901          901
120      910               910          910
121      910               950          950

[122 rows x 3 columns]>

In [10]:
# cast route_short_name to string so that we can add in the line names for the G & J lines,
# which will display in the dropdown on the front end

lines_df = lines_df.astype({'route_short_name': 'str'})

lines_df.loc[lines_df.route_short_name == '910', 'route_short_name'] = '910 / J Line (Silver)'
lines_df.loc[lines_df.route_short_name == '950', 'route_short_name'] = '950 / J Line (Silver)'
lines_df.loc[lines_df.route_short_name == '901', 'route_short_name'] = '901 / G Line (Orange)'

# add line name for the L Line (Gold) Shuttle
lines_df.loc[lines_df.route_short_name == '854', 'route_short_name'] = '854 / L Line (Gold) Shuttle'

lines_df.tail()

Unnamed: 0,route_id,route_short_name,route_number
99,761,761,761
100,854,854 / L Line (Gold) Shuttle,854
101,901,901 / G Line (Orange),901
120,910,910 / J Line (Silver),910
121,910,950 / J Line (Silver),950


In [11]:
# Finally, output this list of lines to a JSON file
# with one object for each line.

lines_df.to_json(OUTPUT_PATH + 'lines.json', orient='records')