In [1]:
import sqlalchemy
engine = sqlalchemy.create_engine('postgresql://localhost/iet')
conn = engine.connect()

import pandas

In [2]:
def rename_tables():
    """Rename tables from gtfs_* to gtfs.*"""
    res = conn.execute(
    sqlalchemy.text(
        "select table_name from information_schema.tables where table_name like 'gtfs%'"
        )
    )
    table_names = [r[0] for r in res.fetchall()]
    for t in table_names:
        print("alter table public.{} rename to {};".format(t, t[5:]))
        print("alter table public.{} set schema gtfs;".format(t[5:]))
        
def drop_tables():
    res = conn.execute(
        sqlalchemy.text(
            "select table_name from information_schema.tables where table_schema = 'gtfs'"
        )
    )
    table_names = [r[0] for r in res.fetchall()]
    for t in table_names:
        drop_q = "drop table gtfs.{} cascade".format(t)
        print(drop_q)
        conn.execute(drop_q)

def set_timepoints(route, service, direction, seq_of_stop_ids):
    """Set timepoint = 1 on an array stops for a given route/service/direction """
    query = """
    update gtfs.stop_times 
      set timepoint = 1 
      where trip_id in 
          (select trip_id from gtfs.trips where 
              route_id = '{}' 
              and service_id = '{}' 
              and direction_id = '{}')
      and stop_id in 
    ({})    
    """.format(route, service, direction, ",".join(["'{}'".format(s) for s in seq_of_stop_ids]))
    conn.execute(query)
    return query

def get_stops(route):
    """
    Returns a DataFrame with stop sequence for a given route ID, service day, and direction.
    
    example:
    
    > # describe_trips
    > get_stop_sequence(6614)
    """
    query = """
    select
        times.stop_sequence, 
        times.arrival_time,
        trips.route_id, 
        trips.service_id,
        trips.direction_id,
        stops.stop_name,
        stops.stop_id,
        times.timepoint,
        trips.trip_id 
    from gtfs.stop_times times 
        inner join gtfs.trips trips on trips.trip_id = times.trip_id
        inner join gtfs.stops stops on stops.stop_id = times.stop_id
    where trips.trip_id in 
        (select trip_id from gtfs.trips 
            where route_id = '{}')
    order by 
        trip_id asc,
        arrival_time asc,
        stop_sequence asc;
    """.format(route)
    df = pandas.read_sql(query, conn)
    return df

In [130]:
route_id = 6627
d = get_stops(route_id)
# # show the likely timepoints for the route
# d[d.arrival_time.str.contains(":00$")].head(20)
# # show all trips with their first and last stop
# d.groupby(['trip_id', 'service_id', 'direction_id'])['stop_name'].agg(['first', 'last']).head(10)

In [135]:
# # filter by direction and only show likely timepoints
d[d.direction_id == 0][d.service_id == '1'][d.arrival_time.str.contains(":00$")][d.stop_name.str.contains('ROSA')].head(120)

  


Unnamed: 0,stop_sequence,arrival_time,route_id,service_id,direction_id,stop_name,stop_id,timepoint,trip_id
3057,3,20:02:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162921
3123,3,20:32:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162922
3189,3,21:02:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162923
3255,3,22:00:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162924
3321,3,23:00:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162925
3387,3,24:00:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162926
3453,3,04:00:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162927
3519,3,04:30:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162928
3585,3,05:00:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162929
3781,3,25:00:00,6627,1,0,ROSA PARKS TRANSIT 34 EB,8942,,1162932


In [44]:
# # # 7 - Cadillac-Harper
# rt_id = 6606
# stops = {
#     0: [8911, 32, 34, 35, 36, 38, 42, 41],
#     1: [41, 8963, 44, 46, 47, 49, 51, 8911]
# }

# # # 9 - Chalmers ***has weird schedule
# rt_id = 6607
# stops = {
#     0: [10178, 74, 75, 76, 77],
#     1: [77, 79, 80, 83, 10178]
# }

# # # 10 Chene
# rt_id = 6608
# stops = {
#     0: [8892, 87, 118, 89, 90, 91, 92],
#     1: [95, 96, 97, 98, 99, 100, 503, 8892]
# }

# # # 11 Clairmount
# rt_id = 6609
# stops = {
#     0: [103, 2491, 107, 800, 111, 112, 115],
#     1: [115, 1236, 118, 8248, 122, 2492, 103]
# }

# # # 12 Conant
# rt_id = 6610
# stops = {
#     0: [127, 7864, 129, 130, 9034, 5525, 132, 10176, 10167],
#     1: [10167, 135, 136, 5524, 9035, 138, 139, 140, 127]
# }

# # # 13 Conner *** has nonstandard schedule!
# rt_id = 6611
# stops = {
#     0: [93, 8552, 144, 145, 9033, 147, 148, 149, 150, 731],
#     1: [153, 154, 1989, 10031, 157, 8591, 93]
# }

# # # 14 Crosstown
# rt_id = 6612
# stops = {
#     0: [10187, 7141, 161, 162, 163, 164, 165, 169, 171, 172, 41],
#     1: [41, 175, 48, 177, 181, 182, 183, 184, 185, 7140, 10192]
# }

# # # 15 Chicago Davison
# rt_id = 6613
# stops = {
#     0: [188, 190, 191, 192, 193, 194, 195, 196, 197, 198],
#     1: [198, 200, 201, 202, 203, 204, 205, 206, 207, 188]
# }

# # # # 16 Dexter *** has nonstandard schedule!
# rt_id = 6614
# stops = {
#     0: [10340, 10325, 212, 213, 214, 215, 216, 218, 220, 221, 8913, 223],
#     1: [223, 8913, 224, 225, 227, 229, 230, 231, 6056, 233, 10326, 10340]
# }

# # # # 17 Eight Mile *** has nonstandard schedule
# rt_id = 6615
# stops = {
#     0: [236, 238, 10325, 240, 242, 243, 144, 62, 245, 40],
#     1: [41, 245, 63, 247, 248, 249, 251, 10326, 253, 236]
# }

# # # # 18 Fenkell
# rt_id = 6616
# stops = {
#     0: [256, 258, 260, 262, 263, 265, 266, 267, 8928],
#     1: [8928, 272, 273, 274, 3377, 276, 277, 279, 286, 288]
# }

# # # 19 Fort
# rt_id = 6617
# stops = {
#     0: [8891, 290, 291, 2558, 294, 295],
#     1: [298, 299, 2561, 302, 303, 8891]
# }

# # # 21 Grand River *** has non standard schedule
# rt_id = 6618
# stops = {
#     0: [324, 325, 328, 330, 331, 332, 334, 335, 337, 9972],
#     1: [9972, 344, 346, 347, 349, 350, 351, 353, 356, 324]
# }

# # # 22 Greenfield ***
# rt_id = 6619
# stops = {
#     0: [210, 10325, 358, 359, 361, 363, 600, 638],
#     1: [368, 372, 374, 376, 378, 379, 10326, 210]
# }

# # # 23 Hamilton
# rt_id = 6628
# stops = {
#     0: [8933, 9474, 401, 402, 403, 9658, 10145],
#     1: [1045, 9690, 386, 387, 10339, 9475, 8933]
# }

# # # 25 Jefferson
# rt_id = 6621
# stops = {
#     0: [9970, 9021, 420, 421, 422, 10178],
#     1: [10178, 425, 426, 427, 341, 9970]
# }

# # # 27 Joy
# rt_id = 6622
# stops = {
#     0: [429, 432, 434, 435, 437, 438, 439, 441, 443, 8910],
#     1: [8910, 444, 446, 737, 448, 449, 451, 452, 454, 429]
# }

# # # 29 Linwood
# rt_id = 6623
# stops = {
#     0: [2781, 465, 467, 469, 470, 8912],
#     1: [8912, 474, 475, 477, 479, 2781]
# }

# # # 30 Livernois *** non standard schedule
# rt_id = 6624
# stops = {
#     0: [127, 481, 8987, 482, 483, 484, 486, 487, 488],
#     1: [488, 490, 491, 493, 494, 495, 230, 496, 127]
# }

# # # 31 Mack
# rt_id = 6625
# stops = {
#     0: [41, 497, 498, 499, 6334, 502, 1616, 8918],
#     1: [8918, 21, 505, 6336, 508, 509, 510, 41]
# }

# # # 32 McNichols ***
# rt_id = 6626
# stops = {
#     0: [9860, 7230, 514, 515, 517, 519, 524, 510, 41],
#     1: [41, 527, 529, 534, 536, 538, 539, 10157, 9860]
# }

# # # 34 Gratiot ***
# rt_id = 6627
# stops = {
#     0: [9963, 8942, 167, 547, 549, 550, 552, 62],
#     1: [554, 555, 557, 558, 178, 179, 9963]
# }

# # # 49 Vernor
# rt_id = 6640
# stops = {
#     0: [6578, 10264, 765, 766, 767, 768, 8915],
#     1: [8915, 770, 8774, 772, 773, 10265, 6578]
# }

# for timepoint in [1,2,3]:
#     for service in [0, 1]:
#         print(set_timepoints(rt_id, timepoint, service, stops[service]))