In [124]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json
from pathlib import Path

import utm
import pandas as pd
import numpy as np
import shapely.geometry as sg

DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt

%load_ext autoreload
%autoreload 2

DATA_DIR = DIR/'data'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [125]:
#path = DATA_DIR/'sample_gtfs.zip'
#path = DATA_DIR/'other_feeds'/'gtfs.zip'
path = DATA_DIR/'cairns_gtfs.zip'

print(gt.list_gtfs(path))

feed = gt.read_gtfs(path, dist_units='km')

# Pick date
date = feed.get_first_week()[0]
print('date', date)


            file_name  file_size
0  calendar_dates.txt        387
1          routes.txt       1478
2           trips.txt     143081
3           stops.txt      26183
4          agency.txt        199
5      stop_times.txt    2561019
6          shapes.txt     864694
7        calendar.txt        337
date 20140526


In [167]:
import copy


def proto1(feed, dates, split_directions=False,
  headway_start_time='07:00:00', headway_end_time='19:00:00'):
    frames = []
    for date in dates:
        f = gt.compute_stop_stats_base(feed.stop_times, feed.get_trips(date),
          split_directions=split_directions,
          headway_start_time=headway_start_time,
          headway_end_time=headway_end_time)
        f['date'] = date
        frames.append(f)
    
    return pd.concat(frames)

def proto2(feed, dates, split_directions=False,
  headway_start_time='07:00:00', headway_end_time='19:00:00'):
    """
    Call ``compute_stop_stats_base()`` with the subset of trips active on the given date and with the keyword arguments ``split_directions``,   ``headway_start_time``, and ``headway_end_time``.

    See ``compute_stop_stats_base()`` for a description of the output.

    Assume the following feed attributes are not ``None``:

    - ``feed.stop_timtes``
    - Those used in :func:`get_trips`

    NOTES:

    This is a more user-friendly version of ``compute_stop_stats_base()``.
    The latter function works without a feed, though.
    """
    cols = [
      'date',
      'stop_id',
      'num_routes',
      'num_trips',
      'max_headway',
      'min_headway',
      'mean_headway',
      'start_time',
      'end_time',
    ]
    activity = feed.compute_trip_activity(dates)

    # Compile stats for each date, but memoize stats by trip ID sequence
    # to avoid unnecessary computation
    stats_by_ids = {}
    frames = []
    for date in dates:
        stats = {}
        ids = tuple(activity.loc[activity[date] > 0, 'trip_id'])
        if ids in stats_by_ids:
            # Use stats previously computed
            stats = copy.copy(stats_by_ids[ids])
        elif not ids:
            stats  = {col: np.nan for col in cols}
        else:
            # Compute stats afresh
            t = feed.trips
            trips = t[t['trip_id'].isin(ids)].copy()
            stats = gt.compute_stop_stats_base(feed.stop_times, trips,
                  split_directions=split_directions,
                  headway_start_time=headway_start_time,
                  headway_end_time=headway_end_time)
            # Remember stats
            stats_by_ids[ids] = stats

        stats['date'] = date
        frames.append(stats)

    # Assemble stats into DataFrame
    return pd.concat(frames)[cols].copy()


In [165]:
dates = feed.get_dates()
len(dates)

217

In [169]:
%time p1 = proto1(feed, dates[:14])
%time p2 = proto2(feed, dates[:14])
p2

CPU times: user 3.6 s, sys: 0 ns, total: 3.6 s
Wall time: 3.6 s
CPU times: user 1.44 s, sys: 0 ns, total: 1.44 s
Wall time: 1.44 s


Unnamed: 0,date,stop_id,num_routes,num_trips,max_headway,min_headway,mean_headway,start_time,end_time
0,20140526,750000,1.0,30.0,34.0,23.0,29.863636,05:50:00,22:13:00
1,20140526,750001,1.0,30.0,35.0,23.0,29.909091,05:52:00,22:15:00
2,20140526,750002,1.0,30.0,35.0,23.0,29.909091,05:54:00,22:17:00
3,20140526,750003,1.0,30.0,35.0,23.0,29.909091,05:55:00,22:18:00
4,20140526,750004,1.0,30.0,34.0,23.0,29.863636,05:57:00,22:20:00
5,20140526,750005,1.0,30.0,34.0,23.0,29.863636,05:59:00,22:22:00
6,20140526,750006,1.0,30.0,34.0,23.0,29.695652,06:00:00,22:23:00
7,20140526,750007,1.0,30.0,34.0,23.0,29.695652,06:01:00,22:24:00
8,20140526,750008,1.0,30.0,34.0,23.0,29.695652,06:02:00,22:25:00
9,20140526,750009,1.0,30.0,33.0,23.0,29.695652,06:03:00,22:26:00


In [170]:
feed.compute_feed_stats(ts, dates[0])

ValueError: time data '2' does not match format '%Y%m%d'

In [10]:
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Department of Transport and Main Roads - Tran...
1,timezone,Australia/Brisbane
2,start_date,20140526
3,end_date,20141228
4,num_routes,22
5,num_trips,1339
6,num_stops,416
7,num_shapes,54
8,sample_date,20140529
9,num_routes_active_on_sample_date,20


In [11]:
feed.summarize()

Unnamed: 0,table,column,#values,#nonnull_values,#unique_values,min_value,max_value
0,agency,agency_name,1,1,1,Department of Transport and Main Roads - Trans...,Department of Transport and Main Roads - Trans...
1,agency,agency_url,1,1,1,http://www.sunbus.com.au,http://www.sunbus.com.au
2,agency,agency_timezone,1,1,1,Australia/Brisbane,Australia/Brisbane
3,agency,agency_lang,1,1,1,en,en
4,agency,agency_phone,1,1,1,(07)40576411,(07)40576411
0,calendar,service_id,4,4,4,CNS2014-CNS_MUL-Saturday-00,CNS2014-CNS_MUL-Weekday-00-0000100
1,calendar,monday,4,4,2,0,1
2,calendar,tuesday,4,4,2,0,1
3,calendar,wednesday,4,4,2,0,1
4,calendar,thursday,4,4,2,0,1
