In [55]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json

import shapely
from shapely.geometry import mapping, Point, MultiLineString
import pandas as pd
import numpy as np
import geojsonio

gtfs_tk_dir = '../../gtfs-tk/'
sys.path.append(gtfs_tk_dir)
import gtfs_tk.feed as gf
import gtfs_tk.utils as gu

%load_ext autoreload
%autoreload 2
%matplotlib inline  


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
#feed = gf.Feed(gtfs_tk_dir + 'data/portland_gtfs.zip', original_units='ft')
feed = gf.Feed(gtfs_tk_dir + 'data/cairns_gtfs.zip')

# Set study date
date = feed.get_dates()[0]


In [60]:
def clean_series(series, nan_prefix='n/a'):
    """
    Given a series of items, replace NaN entries
    with ``nan_prefix + '#0'``, ``nan_prefix + '#1'``, 
    ``nan_prefix + '#2'``, etc.
    Replace duplicate items x1, x2, x3, etc. with 
    x1 + '#0', x2 + '#1', x3 + '#2', etc.
    Return the resulting series.

    I use this for cleaning route short names.
    """
    # Replace NaNs
    s = series.copy()
    nans = s[s.isnull()]
    fill_nans = ['{!s}#{!s}'.format(nan_prefix, i)
      for i in range(nans.shape[0])]
    s.iloc[nans.index] = fill_nans

    # Replace duplicates
    dups = s[s.duplicated()]
    fill_dups = [x + '#{!s}'.format(i) 
      for i, x in enumerate(dups.values)]
    s.iloc[dups.index] = fill_dups
    return s


In [59]:
rsn = feed.routes['route_short_name'].copy()
rsn[rsn.str.endswith('1')] = 'bing'
rsn[rsn.str.endswith('N')] = np.nan

gu.clean_series(rsn)


0        110
1      n/a#0
2       bing
3        112
4        113
5        120
6      n/a#1
7     bing#0
8        122
9        123
10       130
11    bing#1
12     n/a#2
13       133
14       140
15     n/a#3
16    bing#2
17       142
18       143
19      143W
20       150
21      150E
Name: route_short_name, dtype: object

In [27]:
trips_stats = feed.get_trips_stats()

In [30]:
print(feed.get_feed_stats(trips_stats, date))
print(feed.get_feed_stats(trips_stats, feed.get_dates()[6]))

   num_trips  num_routes  num_stops  peak_num_trips peak_start_time  \
0        622          20        416              39        08:16:00   

  peak_end_time  service_distance  service_duration  service_speed  
0      08:18:00      13774.027234             472.6      29.145212  
   num_trips  num_routes  num_stops  peak_num_trips peak_start_time  \
0        266          14        411              17        14:31:00   

  peak_end_time  service_distance  service_duration  service_speed  
0      14:37:00       6390.846315        197.683333      32.328706  


In [36]:
def get_route_geojson(linestring_by_shape, point_by_stop, route_id):
    # Get shapes
    trips = feed.trips[feed.trips['route_id'] == route_id]
    shape_ids = trips['shape_id'].unique()
    shape = MultiLineString([linestring_by_shape[x] for x in shape_ids])
    routes = feed.routes
    routes = routes[routes['route_id'] == route_id].copy()
    route_cols = routes.columns
    routes['shape'] = pd.Series([shape])
                         
    # Make route feature
    route_features = [{
      'type': 'Feature', 
      'geometry': mapping(row['shape']),
      'properties': json.loads(row[route_cols].to_json()), 
      } for index, row in routes.iterrows()]
    
    # Get stops
    stop_times = pd.merge(feed.stop_times, trips)
    stop_ids = stop_times['stop_id'].unique()
    stops = feed.stops[feed.stops['stop_id'].isin(stop_ids)].copy()
    del stops['stop_lon']
    del stops['stop_lat']
    stop_cols = stops.columns
    stops['shape'] = stops['stop_id'].map(point_by_stop)
    
    # Make stop features
    stop_features = [{
      'type': 'Feature', 
      'geometry': mapping(row['shape']),
      'properties': json.loads(row[stop_cols].to_json()), 
      } for index, row in stops.iterrows()]
    
    # Make feature collection
    g = {'type': 'FeatureCollection', 'features': route_features + stop_features}
    return g

lbs = feed.get_linestring_by_shape(use_utm=False)
pbs = feed.get_point_by_stop(use_utm=False)
route = '110-423'
g = get_route_geojson(lbs, pbs, route)

geojsonio.embed(json.dumps(g))


In [4]:
trips_stats = feed.get_trips_stats()

In [11]:
trips = feed.get_trips(date)

# Compute peak stats
f = pd.merge(trips, trips_stats)
f[['start_time', 'end_time']] =\
  f[['start_time', 'end_time']].applymap(gu.timestr_to_seconds)

d = {}
d['time'] =np.unique(f[['start_time', 'end_time']].values)
d['count'] = [gf.count_active_trips(f, t) for t in d['time']]
g = pd.DataFrame(d)
g
m = g['count'].max()
g[g['count'] == m]
#start, end = utils.get_peak_indices(times, counts)
# d['peak_num_trips'] = counts[start]
# d['peak_start_time'] =\
#   utils.timestr_to_seconds(times[start], inverse=True)
# d['peak_end_time'] =\
#   utils.timestr_to_seconds(times[end], inverse=True)


Unnamed: 0,count,time
79,39,29760
99,39,31560
445,39,63960


In [13]:
route = '110-423'
f = routes_stats
f[f['route_id'] == route].T

Unnamed: 0,0
route_id,110-423
num_trips,34
is_loop,0
is_bidirectional,1
start_time,06:16:00
end_time,25:04:00
max_headway,60
mean_headway,60
peak_num_trips,2
peak_start_time,08:16:00


In [75]:
f = routes_ts
f = f['num_trips'][route]
f.ix[8*60:(9*60 + 5)]


2014-06-07 08:00:00    1
2014-06-07 08:01:00    1
2014-06-07 08:02:00    1
2014-06-07 08:03:00    1
2014-06-07 08:04:00    1
2014-06-07 08:05:00    1
2014-06-07 08:06:00    1
2014-06-07 08:07:00    1
2014-06-07 08:08:00    2
2014-06-07 08:09:00    2
2014-06-07 08:10:00    1
2014-06-07 08:11:00    1
2014-06-07 08:12:00    1
2014-06-07 08:13:00    1
2014-06-07 08:14:00    1
2014-06-07 08:15:00    1
2014-06-07 08:16:00    2
2014-06-07 08:17:00    2
2014-06-07 08:18:00    2
2014-06-07 08:19:00    2
2014-06-07 08:20:00    2
2014-06-07 08:21:00    2
2014-06-07 08:22:00    2
2014-06-07 08:23:00    2
2014-06-07 08:24:00    2
2014-06-07 08:25:00    2
2014-06-07 08:26:00    2
2014-06-07 08:27:00    2
2014-06-07 08:28:00    2
2014-06-07 08:29:00    2
                      ..
2014-06-07 08:35:00    2
2014-06-07 08:36:00    2
2014-06-07 08:37:00    2
2014-06-07 08:38:00    2
2014-06-07 08:39:00    2
2014-06-07 08:40:00    2
2014-06-07 08:41:00    2
2014-06-07 08:42:00    2
2014-06-07 08:43:00    2


In [29]:
date = feed.get_dates()[0]
f = gu.time_it(feed.get_routes_time_series)(trips_stats, date, freq='5Min')
print(f.index)
f.to_csv('temp.csv')
g = pd.read_csv('temp.csv', header=[0, 1], parse_dates=True, index_col=0, infer_datetime_format=True)
x = pd.tseries.frequencies.infer_freq(g.index)
g.index.freq = pd.tseries.frequencies.to_offset(x)
print(g.index)
gf.downsample(g, freq='15Min')

Timing get_routes_time_series
2015-03-27 16:36:32.265245 Began process
2015-03-27 16:36:32.457691 Finished in 0.00 min
<class 'pandas.tseries.index.DatetimeIndex'>
[2014-05-26 00:00:00, ..., 2014-05-26 23:55:00]
Length: 288, Freq: 5T, Timezone: None
<class 'pandas.tseries.index.DatetimeIndex'>
[2014-05-26 00:00:00, ..., 2014-05-26 23:55:00]
Length: 288, Freq: 5T, Timezone: None


indicator,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,...,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed
route_id,110-423,111-423,112-423,113-423,120-423,120N-423,121-423,122-423,123-423,130-423,...,131-423,131N-423,133-423,140-423,141-423,142-423,143-423,143W-423,150-423,150E-423
2014-05-26 00:00:00,0,0,0,0,0,0,0,0,0,0,...,,,29.675914,28.625587,,,,,,
2014-05-26 00:15:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 00:30:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 00:45:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 01:00:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 01:15:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 01:30:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 01:45:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 02:00:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
2014-05-26 02:15:00,0,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,
