In [1]:
import gtfs_kit as gk
import pandas as pd
import datetime
from copy import deepcopy



In [2]:
gtfs_path = '../data/raw/gtfs_brt'
tempo_ciclo_path = f'../data/raw/gtfs_brt/tempo_ciclo_brt.csv'
stop_times_path = '../data/raw/gtfs_brt/stop_times.txt'

In [3]:
# 1. Generate stop_times simplified

tables = ['trips', 'stop_sequences', 'frequencies']

gtfs = {table: pd.read_csv(f'../data/raw/gtfs_brt/{table}.txt') for table in tables}
all_trip_ids = gtfs['trips'][['route_id', 'direction_id', 'trip_id']]
(gtfs['stop_sequences']
 .merge(all_trip_ids, on=['route_id', 'direction_id'])
 [['trip_id', 'stop_id', 'stop_sequence']]
 .assign(departure_time=None)
 .assign(arrival_time=None) 
 .to_csv(stop_times_path, index=False))

In [4]:
# 2. Calculate stop distances

In [5]:
feed = gk.read_feed(gtfs_path, dist_units='km')
stop_times = feed.append_dist_to_stop_times().stop_times

In [6]:
# 3. Calculate time interval between stops

In [7]:
tempo_ciclo = pd.read_csv(tempo_ciclo_path)
tempo_ciclo['trip_id'] = tempo_ciclo.apply(lambda x: '_'.join([str(x['route_id']), str(int(x['direction_id'])), 'U']), 1)

stop_times = stop_times.merge(tempo_ciclo[['trip_id', 'tempo_ciclo']], on='trip_id')
stop_times['norm_dist'] = stop_times.groupby('trip_id').transform(lambda x: x/x.max())['shape_dist_traveled']
stop_times['time_btw_stops'] = stop_times['norm_dist'] * stop_times['tempo_ciclo']
stop_times = stop_times.dropna(subset=['time_btw_stops'])

In [8]:
# 4.Complete stop_times

In [15]:
def add_departure(df):
    
    departures = []
    start_time = df['start_time']

    for i, stops in stop_times[stop_times['trip_id'] == df['trip_id']].iterrows():

        if stops['stop_sequence'] == 1:

            stop_time = start_time

        else:

            stop_time = start_time + datetime.timedelta(minutes=stops['time_btw_stops'])

        departures.append(deepcopy({
            'departure_time': deepcopy(stop_time),
            'stop_sequence': stops['stop_sequence'],
            'stop_id': stops['stop_id']
        }))

        
    return (pd.DataFrame(departures)
            .assign(trip_id=df['trip_id'])
           )


In [22]:
gtfs['frequencies']['start_time'] = gtfs['frequencies']['start_time'].apply(lambda x: datetime.datetime.strptime(x,'%H:%M:%S'))
gtfs['frequencies']['end_time'] = gtfs['frequencies']['end_time'].apply(lambda x: datetime.datetime.strptime(x,'%H:%M:%S'))

TypeError: strptime() argument 1 must be str, not Timestamp

In [20]:

departures = pd.concat(
         gtfs['frequencies'].drop_duplicates(subset=['trip_id'])
         .apply(add_departure, 1)
         .to_list()
        ).reset_index(drop=True)
departures['departure_time'] = departures['departure_time'].apply(lambda x: x.strftime('%H:%M:%S'))
departures['arrival_time'] = departures['departure_time']
departures['stop_sequence'] = departures['stop_sequence'].astype(int)
departures[['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence']
          ].to_csv(stop_times_path, index=False)

In [23]:
trips = set(gtfs['trips']['trip_id'].unique())
print('Missing trip_ids: ', trips.difference(set(departures['trip_id'].unique())))

Missing trip_ids:  {'38_1_U', '11_0_U', '11_1_U', '38_0_U', '42A_0_U'}


In [None]:
# Validate

In [24]:
feed = gk.read_feed(gtfs_path, dist_units='km')

In [25]:
feed.validate()

Unnamed: 0,type,message,table,rows
0,warning,Unrecognized column platform_code,stops,[]
1,warning,Unrecognized column stop_brt,stops,[]
2,warning,Unrecognized column active,stops,[]
3,warning,Stop has no stop times,stops,"[1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19,..."
4,warning,Trip has no stop times,trips,"[2, 3, 20, 21, 36]"


In [26]:
feed.write('../data/output/gtfs_brt.zip')