# Overview

This Jupyter Notebook generates `lines.json`, which is used by the MyBus tool to populate the line dropdown on the landing page.

This version is made specifically for the September 2021 shakeup.


In [118]:
import pandas as pd
import numpy as np

In [119]:
# the routes.txt file could be pulled from GitLab but then it would change once the GTFS is updated.
#ROUTES_PATH = 'https://gitlab.com/LACMTA/gtfs_bus/-/raw/master/routes.txt'

# instead, we use a local copy of the specific version we want to use
ROUTES_PATH = '../data/input/routes.txt'

OUTPUT_PATH = '../data/'

# scratch work folder, this directory is git-ignored
SCRATCH_PATH = 'scratch/'

In [120]:
# Read in `routes.txt` directly from the GitLab repository.
# This may pose a problem if the GTFS changes unexpectedly.

lines_df = pd.read_csv(ROUTES_PATH, 
    usecols={'route_id', 'route_short_name'},
    dtype={'route_id':'string', 'route_short_name':'string'})

lines_df

FileNotFoundError: [Errno 2] No such file or directory: '../dtaa/input/routes.txt'

In [None]:
# Remove the HASTUS build number from the route_id.
# Not sure if the front-end actually needs it or not, so we may need to put it back.

lines_df.route_id = lines_df.route_id.str.replace(r'-.*', '', regex=True)

lines_df

Unnamed: 0,route_id,route_short_name
0,2,2
1,4,4
2,10,10/48
3,14,14/37
4,16,16
...,...,...
113,901,
114,910,
115,DSE,South Bay Dodger Stadium Express
116,DSE,Dodger Stadium Express


## Modify the Routes List

Start: 118 rows

1. Remove the temporary shuttles (Dodger Stadium Express, SoFi Stadium Express, and L Line (Gold)).
2. Add `route_short_name` for the Orange and Silver Lines.
3. Split the sister-routes.

In [None]:
# 1. Remove the temporary shuttles (Dodger Stadium Express, SoFi Stadium Express, and L Line (Gold)).

# 2x Dodger Stadium Express
# 1x SoFi Stadium Express
# 1x L Line (Gold) Shuttle
# 118 - 4 = 114 rows after this

lines_df = lines_df.loc[~lines_df.route_id.isin(['DSE', '854', 'SOFI'])]

lines_df

Unnamed: 0,route_id,route_short_name
0,2,2
1,4,4
2,10,10/48
3,14,14/37
4,16,16
...,...,...
109,733,733
110,754,754
111,761,761
113,901,


In [None]:
# 2. Add route_short_name values for the 901 (Orange Line) and 910/950 (Silver Line)
lines_df.loc[lines_df.route_id == '910', 'route_short_name'] = '910/950'
lines_df.loc[lines_df.route_id == '901', 'route_short_name'] = '901'

lines_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,route_id,route_short_name
0,2,2
1,4,4
2,10,10/48
3,14,14/37
4,16,16
...,...,...
109,733,733
110,754,754
111,761,761
113,901,901


In [None]:
lines_separated = lines_df.loc[lines_df.route_short_name.str.contains('/'), 'route_short_name'].values

# 8 rows have sister routes
lines_separated

<StringArray>
['10/48', '14/37', '35/38', '78/79', '211/215', '222/656', '242/243',
 '910/950']
Length: 8, dtype: string

In [None]:
# 3. Split the sister routes
# This will double the number of sister routes, adding 8 rows to the total.
# 114 + 8 = 122 rows
lines_separated = lines_df.loc[lines_df.route_short_name.str.contains('/'), 'route_short_name'].values

for i, l in enumerate(lines_separated):
    id = lines_df.loc[lines_df.route_short_name == l]['route_id'].values[0]
    slash = l.find('/')
    line1 = l[:slash]
    line2 = l[slash+1:]
    
    lines_df = lines_df.loc[~lines_df.route_id.isin([id])]
    newlines = pd.DataFrame([[id, line1], [id, line2]], columns=['route_id', 'route_short_name'])
    lines_df = lines_df.append(newlines, ignore_index=True)

# cast route_short_name to int32 so that we can sort by their integer value
lines_df = lines_df.astype({'route_short_name': 'int32'}).sort_values('route_short_name')

lines_df.tail

<bound method NDFrame.tail of     route_id  route_short_name
0          2                 2
1          4                 4
106       10                10
108       14                14
2         16                16
..       ...               ...
103      754               754
104      761               761
105      901               901
120      910               910
121      910               950

[122 rows x 2 columns]>

In [None]:
# cast route_short_name to string so that we can add in the line names for the G & J lines,
# which will display in the dropdown on the front end

lines_df = lines_df.astype({'route_short_name': 'str'})

lines_df.loc[lines_df.route_short_name == '910', 'route_short_name'] = '910 / J Line (Silver)'
lines_df.loc[lines_df.route_short_name == '950', 'route_short_name'] = '950 / J Line (Silver)'
lines_df.loc[lines_df.route_short_name == '901', 'route_short_name'] = '901 / G Line (Orange)'

lines_df.tail()

Unnamed: 0,route_id,route_short_name
103,754,754
104,761,761
105,901,901 / G Line (Orange)
120,910,910 / J Line (Silver)
121,910,950 / J Line (Silver)


In [None]:
lines_df.drop(columns=['route_id'], inplace=True)

lines_df

Unnamed: 0,route_short_name
0,2
1,4
106,10
108,14
2,16
...,...
103,754
104,761
105,901 / G Line (Orange)
120,910 / J Line (Silver)


In [None]:
# Finally, output this list of lines to a JSON file
# with one object for each line.

lines_df.to_json(OUTPUT_PATH + 'lines.json', orient='records')