In [1]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json
from pathlib import Path

import utm
import pandas as pd
import numpy as np
import shapely.geometry as sg

DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt

%load_ext autoreload
%autoreload 2

DATA_DIR = DIR/'data'

In [2]:
#path = DATA_DIR/'sample_gtfs.zip'
#path = DATA_DIR/'other_feeds'/'gtfs.zip'
path = DATA_DIR/'cairns_gtfs.zip'

print(gt.list_gtfs(path))

feed = gt.read_gtfs(path, dist_units='km')

# Pick date
date = feed.get_first_week()[0]
print('date', date)


            file_name  file_size
0  calendar_dates.txt        387
1          routes.txt       1478
2           trips.txt     143081
3           stops.txt      26183
4          agency.txt        199
5      stop_times.txt    2561019
6          shapes.txt     864694
7        calendar.txt        337
date 20140526


In [11]:
import copy


def proto1(feed, trip_stats, dates, split_directions=False,
  headway_start_time='07:00:00', headway_end_time='19:00:00'):
    frames = []
    for date in dates:
        f = feed.compute_route_stats(trip_stats, date,
          split_directions=split_directions,
          headway_start_time=headway_start_time,
          headway_end_time=headway_end_time)
        f['date'] = date
        frames.append(f)
    
    return pd.concat(frames)

def proto2(feed, trip_stats, dates, split_directions=False,
  headway_start_time='07:00:00', headway_end_time='19:00:00'):
    """
    Compute stats for all stops for the given dates, and return
    the result as DataFrame with the following columns.

    - date
    - route_id
    - route_short_name
    - route_type
    - direction_id
    - num_trips: number of trips
    - is_loop: 1 if at least one of the trips on the route has its ``is_loop`` field equal to 1; 0 otherwise
    - is_bidirectional: 1 if the route has trips in both directions; 0 otherwise
    - start_time: start time of the earliest trip on the route
    - end_time: end time of latest trip on the route
    - max_headway: maximum of the durations (in minutes) between trip starts on the route between ``headway_start_time`` and ``headway_end_time`` on the given dates
    - min_headway: minimum of the durations (in minutes) mentioned above
    - mean_headway: mean of the durations (in minutes) mentioned above
    - peak_num_trips: maximum number of simultaneous trips in service (for the given direction, or for both directions when ``split_directions==False``)
    - peak_start_time: start time of first longest period during which the peak number of trips occurs
    - peak_end_time: end time of first longest period during which the peak number of trips occurs
    - service_duration: total of the duration of each trip on the route in the given subset of trips; measured in hours
    - service_distance: total of the distance traveled by each trip on the route in the given subset of trips; measured in wunits, that is, whatever distance units are present in trip_stats_subset; contains all ``np.nan`` entries if ``feed.shapes is None``
    - service_speed: service_distance/service_duration; measured in distance units per hour
    - mean_trip_distance: service_distance/num_trips
    - mean_trip_duration: service_duration/num_trips

    If ``split_directions == False``, then compute each stop's stats
    using trips visiting it from both directions.

    If there are no stats for the given dates, then return an empty
    DataFrame with the columns above.

    Assume the following feed attributes are not ``None``:

    - Those used in :func:`.helpers.compute_route_stats_base`
    """
    ts = trip_stats.copy()
    activity = feed.compute_trip_activity(dates)
    cols = [
      'date',
      'route_id',
      'route_short_name',
      'route_type',
      'num_trips',
      'is_bidirectional',
      'start_time',
      'end_time',
      'max_headway',
      'min_headway',
      'mean_headway',
      'peak_num_trips',
      'peak_start_time',
      'peak_end_time',
      'service_duration',
      'service_distance',
      'service_speed',
      'mean_trip_distance',
      'mean_trip_duration',
      ]
    if split_directions:
        cols.append('direction_id')

    # Collect stats for each date, memoizing stats by trip ID sequence
    # to avoid unnecessary recomputations.
    # Store in dictionary of the form
    # trip ID sequence ->
    # [stats DataFarme, date list that stats apply]
    stats_and_dates_by_ids = {}
    for date in dates:
        ids = tuple(activity.loc[activity[date] > 0, 'trip_id'])
        if ids in stats_and_dates_by_ids:
            # Append date to date list
            stats_and_dates_by_ids[ids][1].append(date)
        else:
            # Compute stats
            t = ts[ts['trip_id'].isin(ids)].copy()
            stats = gt.compute_route_stats_base(t,
              split_directions=split_directions,
              headway_start_time=headway_start_time,
              headway_end_time=headway_end_time)

            # Remember stats
            stats_and_dates_by_ids[ids] = [stats, [date]]

    # Assemble stats into DataFrame
    if not dates:
        f = pd.DataFrame([], columns=cols)
    else:
        frames = []
        for stats, dates in stats_and_dates_by_ids.values():
            for date in dates:
                f = stats.copy()
                f['date'] = date
                frames.append(f)
        f = pd.concat(frames).sort_values(['date', 'route_id'])

    return f


In [9]:
ts = feed.compute_trip_stats()
dates = feed.get_dates()
len(dates)

217

In [14]:
sd = True
%time p1 = proto1(feed, ts, dates[:14], split_directions=sd)
%time p2 = proto2(feed, ts, dates[:14], split_directions=sd)
p2

CPU times: user 13 s, sys: 48 ms, total: 13 s
Wall time: 13 s
CPU times: user 3.86 s, sys: 52 ms, total: 3.92 s
Wall time: 3.92 s


Unnamed: 0,route_id,direction_id,route_short_name,route_type,num_trips,is_loop,start_time,end_time,max_headway,min_headway,...,peak_num_trips,peak_start_time,peak_end_time,service_distance,service_duration,is_bidirectional,service_speed,mean_trip_distance,mean_trip_duration,date
0,110-423,0,110,3,30,0,05:50:00,23:05:00,35.0,23.0,...,3,18:13:00,18:20:00,975.213639,29.916667,1,32.597670,32.507121,0.997222,20140526
1,110-423,1,110,3,29,0,07:10:00,24:02:00,30.0,30.0,...,2,07:40:00,08:08:00,919.009424,27.433333,1,33.499736,31.689980,0.945977,20140526
2,111-423,0,111,3,29,0,06:02:00,23:35:00,67.0,25.0,...,3,06:57:00,07:05:00,1005.368216,30.366667,1,33.107625,34.667870,1.047126,20140526
3,111-423,1,111,3,29,0,07:25:00,24:36:00,30.0,30.0,...,3,08:25:00,08:26:00,997.309407,28.983333,1,34.409755,34.389980,0.999425,20140526
4,112-423,0,112,3,15,1,07:55:00,22:31:00,60.0,60.0,...,1,07:55:00,08:31:00,317.421880,9.000000,0,35.269098,21.161459,0.600000,20140526
5,113-423,0,113,3,3,0,06:05:00,08:10:00,,,...,2,06:35:00,06:45:00,74.298371,2.083333,1,35.663218,24.766124,0.694444,20140526
6,113-423,1,113,3,3,0,16:05:00,18:42:00,60.0,60.0,...,1,16:05:00,16:42:00,73.303194,1.850000,1,39.623348,24.434398,0.616667,20140526
7,120-423,0,120,3,17,0,05:34:00,22:23:00,60.0,60.0,...,1,05:34:00,06:23:00,470.550755,13.883333,1,33.893212,27.679456,0.816667,20140526
8,120-423,1,120,3,15,0,07:00:00,21:51:00,60.0,60.0,...,1,07:00:00,07:51:00,428.504391,12.750000,1,33.608188,28.566959,0.850000,20140526
9,120N-423,1,120N,3,2,0,22:00:00,23:51:00,,,...,1,22:00:00,22:51:00,81.004241,1.700000,0,47.649553,40.502120,0.850000,20140526


In [None]:
feed.compute_feed_stats(ts, dates[0])

In [None]:
feed.describe()

In [None]:
feed.summarize()