# Overview

This Jupyter Notebook takes in data from a Google Sheet that contains line change details and their associated high level categories and outputs a JSON file for the MyBus tool.

The output file is used by the MyBus tool's results page and contains the Line-level changes that are displayed there.

Run all cells to generate: `lines-changes.json`

In [13]:
import pandas as pd
GOOGLE_SHEET_URL = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQq0095iOV4dn5McH5IgL4tfjBGLRpCS4XIw-TsZKXubWLyycCfbmnyWdDJRr73ctUMjv32DvKmvVbj/pub?output=csv'
DATA_INPUT_PATH = '../data/input'

DATA_OUTPUT_PATH = '../data/'

In [20]:
# line_changes = pd.read_csv(DATA_INPUT_PATH + 'mybus-line-changes-data.csv',
    # usecols={'Line Number', 'Line Label', 'Line Description', 'Lines Merged', 'Line Discontinued', 'Service Restored', 'Service', 'Route', 'Schedule', 'Schedule URL', 'Current Schedule URL'})
line_changes = pd.read_csv(GOOGLE_SHEET_URL,
    usecols={'Line Number', 'Line Label', 'Line Description', 'Lines Merged', 'Line Discontinued', 'Service Restored', 'Service', 'Route', 'Schedule', 'Current Schedule URL'})

line_changes.columns = ["line-number","line-label","line-description","lines-merged","line-discontinued","service-restored","card-1","card-2","card-3","current-schedule-url"]

line_changes = line_changes.fillna('')
line_changes.head()

Unnamed: 0,line-number,line-label,line-description,lines-merged,line-discontinued,service-restored,card-1,card-2,card-3,current-schedule-url
0,2,2,Downtown LA - Westwood via Sunset Bl,False,False,False,,,,
1,4,4,Downtown LA - Santa Monica via Santa Monica Bl,True,False,False,,,,
2,10,10,West Hollywood - Downtown LA via Melrose Av,False,False,False,,,,
3,14,14,Beverly Center - Downtown LA via Beverly Bl,False,False,False,,,,
4,16,16,Downtown LA - West Hollywood via West 3rd St,False,False,False,,,,


In [24]:
# import shutil
import os

#define the folders to look through
folders = os.listdir("../files/schedules")

#set an array for the file types
pdfs_list = []

#create a list of file types
for root, dirs, files in os.walk("../files/schedules"):
    for filename in files:
        lines = filename.replace(" ","").split("_TT")[0].split("-")
        for line in lines:
            this_schedule = {}
            this_schedule['line-number'] = line.lstrip("0")
            this_schedule['schedule-url'] = "./files/schedules/"+filename
            pdfs_list.append(this_schedule)
            # print(line)
# print(pdfs_list)

schedule_df = pd.DataFrame(pdfs_list)
schedule_df.tail(10)

Unnamed: 0,line-number,schedule-url
68,489,./files/schedules/487-489_TT_09-12-21.pdf
69,534,./files/schedules/534_TT_09-12-21.pdf
70,550,./files/schedules/550_TT_09-12-21.pdf
71,602,./files/schedules/602_TT_09-12-21.pdf
72,617,./files/schedules/617_TT_09-12-21.pdf
73,690,./files/schedules/690_TT_09-12-21.pdf
74,754,./files/schedules/754_TT_09-12-21.pdf
75,901,./files/schedules/901_TT_09-12-21.pdf
76,910,./files/schedules/910-950_TT_09-12-21.pdf
77,950,./files/schedules/910-950_TT_09-12-21.pdf


In [26]:
schedule_df['line-number'] = schedule_df['line-number'].astype(int)
line_changes['line-number'] = line_changes['line-number'].astype(int)
merged_lines = line_changes.merge(schedule_df, on=['line-number'],how='outer').fillna('')
merged_lines

Unnamed: 0,line-number,line-label,line-description,lines-merged,line-discontinued,service-restored,card-1,card-2,card-3,current-schedule-url,schedule-url
0,2,2,Downtown LA - Westwood via Sunset Bl,False,False,False,,,,,./files/schedules/002_TT_09-12-21.pdf
1,4,4,Downtown LA - Santa Monica via Santa Monica Bl,True,False,False,,,,,./files/schedules/004_TT_09-12-21.pdf
2,10,10,West Hollywood - Downtown LA via Melrose Av,False,False,False,,,,,./files/schedules/010_TT_09-12-21.pdf
3,14,14,Beverly Center - Downtown LA via Beverly Bl,False,False,False,,,,,./files/schedules/014_TT_09-12-21.pdf
4,16,16,Downtown LA - West Hollywood via West 3rd St,False,False,False,,,,,./files/schedules/016_TT_09-12-21.pdf
...,...,...,...,...,...,...,...,...,...,...,...
120,761,761,Sylmar Station - E Line (Expo) Line via Van Nu...,False,False,False,,,,,
121,854,854 / L Line (Gold) Shuttle,Union Station - Pico/Aliso Station,False,False,False,,,,,
122,901,901 / G Line (Orange),Chatsworth - Canoga Park - North Hollywood,False,False,False,,,,,./files/schedules/901_TT_09-12-21.pdf
123,910,910 / J Line (Silver),San Pedro - Harbor Gateway Transit Center - Do...,False,False,False,,,,,./files/schedules/910-950_TT_09-12-21.pdf


In [27]:
merged_lines.to_json(DATA_OUTPUT_PATH + 'line-changes.json', orient='records')

# As of 8/16/21 - total should be 125 lines.

# As of 7/14/21 - 2 new lines added in the 7/18/21 service update (lines 235 & 622)
# Total should be 150 lines
# As of 5/28/21 - should contain 148 lines total
# 141 existing lines + 7 new lines
print(str(len(merged_lines)) + ' lines')


125 lines
