In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import cPickle as pickle
from model.utilities import haversine, diff_timestamps, earliest_target

# Reading in static schedule data
stops_full = pd.read_csv('data/google_transit/stops.txt', index_col='stop_id')
routes = pd.read_csv('data/google_transit/routes.txt', index_col='route_id')
trips = pd.read_csv('data/google_transit/trips.txt', index_col='trip_id')
stop_times = pd.read_csv('data/google_transit/stop_times.txt')
shapes = pd.read_csv('data/google_transit/shapes.txt')

# Some of these stops are named "Not a public stop" but are still in trips.
# Luckily, in the few trips they appear in, they're only either at the
# beginning or the end, so we can remove them now and we'll still build
# a nice graph with the connections we expect.
stops = stops_full[~stops_full.index.isin([7520, 7530, 7531, 7540])]
stop_times = stop_times[~stop_times['stop_id'].isin([7520, 7530, 7531, 7540])]

# Oh and some stops are in stops.txt but not used in trips... let's remove 'em
used_stops = set(stop_times['stop_id'].unique())
stops = stops[stops.index.isin(used_stops)]

# Let's make some sorted stop-timepoint lists for each stop_id to
# make lookup faster for things
all_stop_timepoints = {}
for stopid in used_stops:
    node_names = stop_times[stop_times['stop_id'] == stopid].\
                    apply(lambda x: '{0}_{1}'.\
                              format(stopid, x['arrival_time']),\
                          axis=1)
    all_stop_timepoints[stopid] = sorted(list(set(node_names)))

ImportError: cannot import name earliest_target

In [2]:
G_x = nx.read_gpickle('graph_x_7.gpkl')

In [3]:
sat_edges = [(u, v, d) for u,v,d in G_x.edges_iter(data=True) if d.get('service_id', 0) == 2]
sun_edges = [(u, v, d) for u,v,d in G_x.edges_iter(data=True) if d.get('service_id', 0) == 3]
w_edges = [(u, v, d) for u,v,d in G_x.edges_iter(data=True) if d.get('service_id', 0) == 1]

In [4]:
all_stop_timepoints[4203][21]

'4203_09:53:03'

In [5]:
G_x.remove_edges_from(sat_edges)
G_x.remove_edges_from(sun_edges)

In [7]:
path_time, path = earliest_target('3335_07:42:08', 4555, 3600, G_x, all_stop_timepoints)

In [27]:
path_time/60.

51.083333333333336

In [15]:
path

['3335_07:42:08',
 '3336_07:46:47',
 '3329_07:48:01',
 '3326_07:49:00',
 '3341_07:49:56',
 '3345_07:51:00',
 '3323_07:52:13',
 '3331_07:53:17',
 '3334_07:54:23',
 '3349_07:55:27',
 '5553_07:57:00',
 '3292_07:58:40',
 '3299_08:00:13',
 '3289_08:01:32',
 '3282_08:03:00',
 '3296_08:04:07',
 '6029_08:05:00',
 '6031_08:06:10',
 '6033_08:07:20',
 '7809_08:09:05',
 '3454_08:15:52',
 '3454_08:16:07',
 '3456_08:16:46',
 '6197_08:20:00',
 '6198_08:20:20',
 '6199_08:20:47',
 '6200_08:21:16',
 '3517_08:21:38',
 '6962_08:22:12',
 '3513_08:22:34',
 '3505_08:23:02',
 '4130_08:23:26',
 '4132_08:24:00',
 '4549_08:24:40',
 '4557_08:25:25',
 '4562_08:26:24',
 '7285_08:27:47',
 '4560_08:28:33',
 '3123_08:30:00',
 '4555_08:33:13']

In [20]:
simple_path = []
last_step = 0
for i in xrange(len(path) - 1):
    route = G_x[path[i]][path[i+1]].get('route_id', 'walk')
    if route != 'walk':
        route = routes.loc[route]['route_short_name']
    if True:#route != last_step:
        print '{0}: {1} to {2}'.\
                    format(G_x.node[path[i]]['arrival_time'], \
                           route, G_x.node[path[i+1]]['stop_name'])
    last_step = route

07:42:08: walk to 18th St & Hattie St
07:46:47: 33 to 18th St & Diamond St
07:48:01: 33 to 18th St & Castro St
07:49:00: 33 to 18th St & Noe St
07:49:56: 33 to 18th St & Sanchez St
07:51:00: 33 to 18th St & Church St
07:52:13: 33 to 18th St & Dolores St
07:53:17: 33 to 18th St & Guerrero St
07:54:23: 33 to 18th St & Valencia St
07:55:27: 33 to Mission St & 18th St
07:57:00: 33 to 16th St & Mission St
07:58:40: 33 to 16th St & Shotwell St
08:00:13: 33 to 16th St & Harrison St
08:01:32: 33 to 16th St & Bryant St
08:03:00: 33 to 16th St & Potrero Ave
08:04:07: 33 to Potrero Ave & 17th St
08:05:00: 33 to Potrero Ave & 18th St
08:06:10: 33 to Potrero Ave & 20th St
08:07:20: 33 to Sf General Hospital
08:09:05: walk to 23rd St & Utah St
08:15:52: walk to 23rd St & Utah St
08:16:07: 10 to 23rd St & Vermont St
08:16:46: walk to Rhode Island St & 23rd St
08:20:00: 19 to Rhode Island St & 24th St
08:20:20: 19 to Rhode Island St & 25th St
08:20:47: 19 to Rhode Island St & 26th St
08:21:16: 19 to 2

In [18]:
stops.loc[3335], stops.loc[4555]

(stop_name    18th St & Hattie St
 stop_desc                       
 stop_lat                 37.7606
 stop_lon                -122.441
 zone_id                         
 stop_url                        
 Name: 3335, dtype: object, stop_name    Evans Ave & Newhall St
 stop_desc                          
 stop_lat                    37.7419
 stop_lon                   -122.386
 zone_id                            
 stop_url                           
 Name: 4555, dtype: object)

In [23]:
routes[routes['route_short_name'] == 'KT']

Unnamed: 0_level_0,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
11397,SFMTA,KT,INGLESIDE/THIRD,,0,,,


In [26]:
trips[trips['route_id'] == 11397]

Unnamed: 0_level_0,route_id,service_id,trip_headsign,direction_id,block_id,shape_id
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
7069264,11397,3,West Portal Station,1,9403,138563
7069263,11397,3,West Portal Station,1,9403,138563
7069262,11397,3,West Portal Station,1,9403,138563
7069261,11397,3,West Portal Station,1,9401,138563
7069260,11397,3,West Portal Station,1,9401,138563
7069259,11397,3,West Portal Station,1,9401,138563
7069258,11397,3,West Portal Station,1,9401,138563
7065914,11397,1,West Portal Station,1,9501,138563
7065913,11397,1,West Portal Station,1,9402,138563
7071692,11397,2,Embarcadero Station,1,9401,138565


In [28]:
foo = nx.all_shortest_paths(G_x, '3335_07:42:08','4555_08:33:13', weight='duration')

In [29]:
foo.next()

['3335_07:42:08',
 '3336_07:46:47',
 '3329_07:48:01',
 '3326_07:49:00',
 '4315_07:50:44',
 '4316_07:51:15',
 '4318_07:51:52',
 '3414_07:52:17',
 '3415_07:52:51',
 '4538_07:53:28',
 '4444_07:54:43',
 '4443_07:55:39',
 '3465_07:56:22',
 '4372_07:56:55',
 '4390_07:57:13',
 '4375_07:57:36',
 '7290_08:00:30',
 '3521_08:01:22',
 '5732_08:02:09',
 '5734_08:02:53',
 '5736_08:03:42',
 '5738_08:04:27',
 '3544_08:05:16',
 '3536_08:06:08',
 '3538_08:07:01',
 '3541_08:08:00',
 '4157_08:08:56',
 '4161_08:09:36',
 '4150_08:10:07',
 '4146_08:10:31',
 '4143_08:11:19',
 '4152_08:12:03',
 '4154_08:12:29',
 '4159_08:13:03',
 '7213_08:13:49',
 '4148_08:15:00',
 '5093_08:15:48',
 '5094_08:16:52',
 '5095_08:17:49',
 '5884_08:18:50',
 '5882_08:19:34',
 '5880_08:20:21',
 '5878_08:20:23',
 '3141_08:22:00',
 '7402_08:23:00',
 '7403_08:24:21',
 '7404_08:25:17',
 '7352_08:26:26',
 '4555_08:33:13']

In [32]:
p1 = ['3335_07:42:08', '3336_07:46:47', '3329_07:48:01', '3326_07:49:00', '4315_07:50:44', '4316_07:51:15', '4318_07:51:52', '3414_07:52:17', '3415_07:52:51', '4538_07:53:28', '4444_07:54:43', '4443_07:55:39', '3465_07:56:22', '4372_07:56:55', '4390_07:57:13', '4375_07:57:36', '7290_08:00:30', '3521_08:01:22', '5732_08:02:09', '5734_08:02:53', '5736_08:03:42', '5738_08:04:27', '3544_08:05:16', '3536_08:06:08', '3538_08:07:01', '3541_08:08:00', '4157_08:08:56', '4161_08:09:36', '4150_08:10:07', '4146_08:10:31', '4143_08:11:19', '4152_08:12:03', '4154_08:12:29', '4159_08:13:03', '7213_08:13:49', '4148_08:15:00', '5093_08:15:48', '5094_08:16:52', '5095_08:17:49', '5884_08:18:50', '5882_08:19:34', '5880_08:20:21', '5878_08:20:23', '3141_08:22:00', '7402_08:23:00', '7403_08:24:21', '7404_08:25:17', '7352_08:26:26', '4555_08:33:13']

In [31]:
print path

['3335_07:42:08', '3336_07:46:47', '3329_07:48:01', '3326_07:49:00', '3341_07:49:56', '3345_07:51:00', '3323_07:52:13', '3331_07:53:17', '3334_07:54:23', '3349_07:55:27', '5553_07:57:00', '3292_07:58:40', '3299_08:00:13', '3289_08:01:32', '3282_08:03:00', '3296_08:04:07', '6029_08:05:00', '6031_08:06:10', '6033_08:07:20', '7809_08:09:05', '3454_08:15:52', '3454_08:16:07', '3456_08:16:46', '6197_08:20:00', '6198_08:20:20', '6199_08:20:47', '6200_08:21:16', '3517_08:21:38', '6962_08:22:12', '3513_08:22:34', '3505_08:23:02', '4130_08:23:26', '4132_08:24:00', '4549_08:24:40', '4557_08:25:25', '4562_08:26:24', '7285_08:27:47', '4560_08:28:33', '3123_08:30:00', '4555_08:33:13']


In [41]:
[s for s in stops['stop_name'] if s.find('Castro') > -1]

['14th St & Castro St',
 '14th St & Castro St',
 '17th St & Castro St',
 '18th St & Castro St',
 '18th St & Castro St',
 '24th St & Castro St',
 '24th St & Castro St',
 '26th St & Castro St',
 'Chenery St & Castro St',
 'Castro St & 14th St',
 'Castro St & 14th St',
 'Castro St & 15th St',
 'Castro St & 15th St',
 'Castro St & 16th St',
 'Castro St & 16th St',
 'Castro St & 17th St',
 'Castro St & 18th St',
 'Castro St & 18th St',
 'Castro St & 19th St',
 'Castro St & 19th St',
 'Castro St & 20th St',
 'Castro St & 20th St',
 'Castro St & 21st St',
 'Castro St & 21st St',
 'Castro St & 22nd St',
 'Castro St & 22nd St',
 'Castro St & 23rd St',
 'Castro St & 23rd St',
 'Castro St & 24th St',
 'Castro St & 24th St',
 'Castro St & 25th St',
 'Castro St & 25th St',
 'Castro St & Duboce Ave',
 'Castro St & Duboce Ave',
 'Castro St & Elizabeth St',
 'Castro St & Elizabeth St',
 'Castro St & Market St',
 'Market St & Castro St',
 'Metro Castro Station/Downtown',
 'Metro Castro Station/Outbound

In [42]:
stops[stops['stop_name'] == 'Metro Castro Station/Downtown']

Unnamed: 0_level_0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5728,Metro Castro Station/Downtown,,37.76262,-122.435231,,


In [45]:
stop_times[stop_times['stop_id'] == 5728]['trip_id'].head(10)

1028342    7064267
1028372    7064268
1028402    7064269
1028432    7064270
1028462    7064271
1028492    7064272
1028522    7064273
1028552    7064274
1028582    7064275
1028612    7064276
Name: trip_id, dtype: int64

In [46]:
trips.loc[7064276]

route_id                       11398
service_id                         1
trip_headsign    Embarcadero Station
direction_id                       1
block_id                        9504
shape_id                      138573
Name: 7064276, dtype: object

In [47]:
routes.loc[11398]

agency_id             SFMTA
route_short_name          L
route_long_name     TARAVAL
route_desc                 
route_type                0
route_url                  
route_color                
route_text_color           
Name: 11398, dtype: object