This file modifies the given GTFS file in xml format to be fit as input file into the MATSim program

In [None]:
# Import dependencies:
import datetime as time
import xml.etree.ElementTree as elem
import numpy as np
import collections

In [56]:
def schedule_modifier(xml_file):
    # Read the xml file we would like to modify:
    schedule_root = elem.parse(xml_file).getroot()
    # Obtain a copy of original schedule_root
    root = schedule_root
    # Initialize a shapeID array to examine if some shapeID exist:
    shapeID_dict = {}
    # Then we could iterate through all transitLine element 
    for transitLine_element in root.findall('transitLine'):
        # We do all the grouping and regrouping within the same transiLine_element
        # Different line should not interfer with each other even though they might have same shapeID:
        # Initialize a transitRoute dict to store the transitRoute_id so that we could do grouping
        for transitRoute_element in transitLine_element:
            # And we can extract the shapeID in the description tag
            shapeID = transitRoute_element.find('description').text[8:]
            # Find the correspoding departures:
            departures = transitRoute_element.find('departures')
            # If the shapeID is new to the shapeID_dict, we could put in the shapeID as key 
            if shapeID not in shapeID_dict:
                shapeID_dict[shapeID] = departures      
            # If the shapeID already exists in the shapeID_dict, we would like to extract each departure element
            # and attach them to the departures section, i.e master departures, where the same shapeID first shows up:
            else:
                master_departures = shapeID_dict.get(shapeID)
                # Iterate through each departure element and attach it to the section
                for departure in departures:
                    master_departures.append(departure)
                # After inserting all these departure into the master departures, we would like to
                # remove this transitRoute element so it would not discriminate itself as a seperate service:
                transitLine_element.remove(transitRoute_element)

The only module we would like to modify is the one describing the transitRoute and we would like to categorize everything according to shapeID

In [54]:
# We could check that the tree structure should be cleaned up right now, 
# and we can check that by see if the shapeIDs are unique now:
arr = []
for transitLine_element in root.findall('transitLine'):
    for transitRoute_element in transitLine_element:
        shapeID_check = transitRoute_element.find('description').text[8:]
        arr.append(shapeID_check)
        
# print('Are all shapeIDs unique? ')
print(np.unique(arr).size)
print(len(arr))

360
2749


Looks Good！

Next step is we are going to replace all departure id to get them consistent with the transitRoute id

In [43]:
for transitLine_element in root.findall('transitLine'):
    for transitRoute_element in transitLine_element:
        transitRoute_id = transitRoute_element.get('id')
        departures = transitRoute_element.find('departures')
        for departure in departures:
            # Get the departure time
            timestamp = departure.get('departureTime')
            # Reset all id with the first part same as the transitRoute_ID and second part as the departure time
            departure.set('id', transitRoute_id + '_' + timestamp)