# Overview

This Jupyter Notebook takes in data from a Google Sheet that contains line change details and their associated high level categories and outputs a JSON file for the MyBus tool.

The output file is used by the MyBus tool's results page and contains the Line-level changes that are displayed there.

Run all cells to generate: `lines-changes.json`

In [25]:
import pandas as pd
# GOOGLE_SHEET_URL = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQq0095iOV4dn5McH5IgL4tfjBGLRpCS4XIw-TsZKXubWLyycCfbmnyWdDJRr73ctUMjv32DvKmvVbj/pub?output=csv'
# GOOGLE_SHEET_URL = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQKADb-wnackdlDZwEF0mTpLPh7MpkI4YQV5gv1TYOzltjiGAXcj35GTb4ftP7yKN8mH74MWLPkSUlq/pub?output=csv'
GOOGLE_SHEET_URL = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vSENm-oLTxuzcQUX_0tZ9X0Q2_HIudg1hi5p0MMauqWoHCuomsxb6H6AhqOkaeBY-X1ZKBTbFAzDKUM/pub?output=csv'
DATA_INPUT_PATH = '../data/input'

DATA_OUTPUT_PATH = '../data/'

In [26]:
line_changes = pd.read_csv(GOOGLE_SHEET_URL,
    usecols={'Line Number', 'Line Label', 'Line Description', 'Route changes','Other changes','Schedule Changes','Stop Cancellations', 'Lines Merged', 'Line Discontinued','Details', 'Service', 'Route', 'Schedule', 'Current Schedule URL'})
line_changes.columns = ["line-number","line-label","line-description",'route-changes','other-changes','schedule-changes','stop-cancellations',"lines-merged","line-discontinued","details","card-1","card-2","card-3","current-schedule-url"]

line_changes = line_changes.fillna('')
line_changes.head()

Unnamed: 0,line-number,line-label,line-description,route-changes,other-changes,schedule-changes,stop-cancellations,lines-merged,line-discontinued,details,card-1,card-2,card-3,current-schedule-url
0,2,2,Downtown LA - Westwood via Sunset Bl,True,False,False,False,True,False,Lines 2 and 200 merge into new Line 2 between ...,<ul><li>Lines 2 and 200 merge into new Line 2 ...,The bus stop eastbound on Sunset east of Bever...,,
1,4,4,Downtown LA - Santa Monica via Santa Monica Bl,True,False,False,True,False,False,Line 4 changes route at the north end of downt...,,<ul><li>Line 4 route changes at the north of d...,,
2,10,10,Pico Rimpau Transit Center - Downtown LA via M...,False,False,True,False,False,False,,,,More weekday trips.,
3,14,14,Beverly Center - Downtown LA via Beverly Bl,False,False,True,False,False,False,,,,More weekday trips.,
4,16,16,Downtown LA - West Hollywood via West 3rd St,False,False,True,False,False,False,,,,More weekday trips.,


In [27]:
# import shutil
import os

#define the folders to look through
folders = os.listdir("../files/schedules")

#set an array for the file types
pdfs_list = []

#create a list of file types
for root, dirs, files in os.walk("../files/schedules"):
    for filename in files:
        lines = filename.replace(" ","").split("_TT")[0].split("-")
        for line in lines:
            this_schedule = {}
            this_schedule['line-number'] = line.lstrip("0")
            this_schedule['schedule-url'] = "./files/schedules/"+filename
            pdfs_list.append(this_schedule)
            # print(line)
# print(pdfs_list)

schedule_df = pd.DataFrame(pdfs_list)
schedule_df.tail(10)

Unnamed: 0,line-number,schedule-url
62,177,./files/schedules/177_TT_12-19-21.pdf
63,55,./files/schedules/055_TT_12-19-21.pdf
64,803,./files/schedules/803_TT_12-19-21.pdf
65,37,./files/schedules/037_TT_12-19-21.pdf
66,154,./files/schedules/154_TT_12-19-21.pdf
67,207,./files/schedules/207_TT_ 12-19-21.pdf
68,761,./files/schedules/761_TT_12-19-21.pdf
69,167,./files/schedules/167_TT_12-19-21.pdf
70,4,./files/schedules/004_TT_12-19-21.pdf
71,51,./files/schedules/051_TT_12-19-21.pdf


In [28]:
schedule_df['line-number'] = schedule_df['line-number'].astype(int)
line_changes['line-number'] = line_changes['line-number'].astype(int)
merged_lines = line_changes.merge(schedule_df, on=['line-number'],how='outer').fillna('')
merged_lines

Unnamed: 0,line-number,line-label,line-description,route-changes,other-changes,schedule-changes,stop-cancellations,lines-merged,line-discontinued,details,card-1,card-2,card-3,current-schedule-url,schedule-url
0,2,2,Downtown LA - Westwood via Sunset Bl,True,False,False,False,True,False,Lines 2 and 200 merge into new Line 2 between ...,<ul><li>Lines 2 and 200 merge into new Line 2 ...,The bus stop eastbound on Sunset east of Bever...,,,./files/schedules/002_TT_12-19-21.pdf
1,4,4,Downtown LA - Santa Monica via Santa Monica Bl,True,False,False,True,False,False,Line 4 changes route at the north end of downt...,,<ul><li>Line 4 route changes at the north of d...,,,./files/schedules/004_TT_12-19-21.pdf
2,10,10,Pico Rimpau Transit Center - Downtown LA via M...,False,False,True,False,False,False,,,,More weekday trips.,,./files/schedules/010_TT_12-19-21.pdf
3,14,14,Beverly Center - Downtown LA via Beverly Bl,False,False,True,False,False,False,,,,More weekday trips.,,./files/schedules/014_TT_12-19-21.pdf
4,16,16,Downtown LA - West Hollywood via West 3rd St,False,False,True,False,False,False,,,,More weekday trips.,,./files/schedules/016_TT_12-19-21.pdf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,950,950 / J Line (Silver),San Pedro - Harbor Gateway Transit Center - Do...,False,False,False,False,False,False,,,,,,./files/schedules/910-950_TT_12-19-21.pdf
123,804,,,,,,,,,,,,,,./files/schedules/804_TT_12_19_21.pdf
124,801,,,,,,,,,,,,,,./files/schedules/801_TT_12-19-21.pdf
125,806,,,,,,,,,,,,,,./files/schedules/806_TT_12-19-21.pdf


In [29]:
merged_lines.to_json(DATA_OUTPUT_PATH + 'line-changes.json', orient='records')

# As of 8/16/21 - total should be 125 lines.

print(str(len(merged_lines)) + ' lines')


127 lines
