In [2]:
import pandas as pd
from ftplib import FTP
from datetime import datetime
import numpy as np
import pymongo
from pymongo import MongoClient

from geopy import distance

import get_recent_days as gtdys
import muni_etl

In [3]:
client = MongoClient('localhost', 27017)

db = client['testavl']
coll = db['clean']

Let's grab on record, get the date, and parse it.

Then, we can look up it's gtfs data, and figure out the service ID for the day.

In [4]:
turkey = coll.find_one()
wing = turkey['REPORT_TIME']
wing

'11/19/2016 00:00:51'

In [5]:
cln_date = datetime.strptime(wing, '%m/%d/%Y %H:%M:%S')

In [6]:
cln_date

datetime.datetime(2016, 11, 19, 0, 0, 51)

In [7]:
cln_date.strftime("%Y-%m-%d")

'2016-11-19'

Loading in the GTFS Data, finding our directory based on the date (function!)

In [8]:
gtfs_lookup_df = pd.read_csv('data/gtfs_lookup.csv')

In [9]:
gtfs_directory = gtdys.get_gtfs_file(cln_date, gtfs_lookup_df)

In [10]:
cal_txt = 'data/gtfs/{}/calendar.txt'.format(gtfs_directory)
cal = pd.read_csv(cal_txt)

In [11]:
# 0 is Monday, 6 is Sunday
wkdy_num = cln_date.weekday()
type(wkdy_num)

int

In [12]:
cal_col_mapping = {'monday':0, 'tuesday':1, 'wednesday':2, 'thursday':3, 
                   'friday':4, 'saturday':5, 'sunday':6}
cal_colnum = cal.rename(columns=cal_col_mapping)

In [13]:
cal_colnum

Unnamed: 0,service_id,0,1,2,3,4,5,6,start_date,end_date
0,1,1,1,1,1,1,0,0,20160813,20170210
1,2,0,0,0,0,0,1,0,20160813,20170210
2,3,0,0,0,0,0,0,1,20160813,20170210


In [14]:
service_id = cal_colnum[cal_colnum[wkdy_num] == 1]
service_id['service_id'].values[0]

2

Cool! I have service ID and can get an basic year-day-month string.
Let's practice updating all the documents?

In [43]:
coll.update_many({},{'$set' : {"new_field":1}})

<pymongo.results.UpdateResult at 0x11560acf0>

Let's clean it up...

In [71]:
coll.update_many({},{'$unset' : {"new_field":1}})

<pymongo.results.UpdateResult at 0x11624b900>

And now let's add our new fields:

In [None]:
cln_date.strftime("%Y-%m-%d")

In [72]:
coll.update_many({},{'$set' : {"new_field":1, }})

{'_id': ObjectId('5adfa72e3ad39e252ff384fc'),
 'REV': '1526',
 'REPORT_TIME': '12/04/2016 05:38:31',
 'VEHICLE_TAG': '5408',
 'LONGITUDE': '-122.40895',
 'LATITUDE': '37.76414',
 'SPEED': '0.0',
 'HEADING': '0.0',
 'TRAIN_ASSIGNMENT': '3302',
 'PREDICTABLE': '1'}

In [140]:
coll.find_one()

{'_id': ObjectId('5adfa72e3ad39e252ff384fc'),
 'REV': '1526',
 'REPORT_TIME': '12/04/2016 05:38:31',
 'VEHICLE_TAG': '5408',
 'LONGITUDE': '-122.40895',
 'LATITUDE': '37.76414',
 'SPEED': '0.0',
 'HEADING': '0.0',
 'TRAIN_ASSIGNMENT': '3302',
 'PREDICTABLE': '1'}

# Intersection time!

First, let's get the lat/lon of the starting stop?
Do I have to look at stop/time for this shit? Probs...
TRAIN_ASSIGNMENT = 3302...

Let's get all trips that have our block

In [85]:
trip_txt = 'data/gtfs/{}/trips.txt'.format(gtfs_directory)
trips = pd.read_csv(trip_txt)

In [187]:
spec_trips = trips[(trips['block_id'] == 3302) & (trips['service_id'] == 3) & (trips['direction_id'] == 0)]

In [94]:
trip_ids = spec_trips['trip_id'].unique()

In [95]:
stptm_txt = 'data/gtfs/{}/stop_times.txt'.format(gtfs_directory)
stp_tm = pd.read_csv(stptm_txt)

In [96]:
stp_tm_trips= stp_tm[stp_tm['trip_id'].isin(trip_ids)]

All starting times for trips (direction=0, service=3, block=3302)!

In [100]:
start_times = stp_tm_trips[stp_tm_trips['stop_sequence'] == 1]

In [101]:
start_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
487958,7253930,19:50:00,19:50:00,6293,1,,,,
488266,7253937,17:30:00,17:30:00,6293,1,,,,
488574,7253944,15:10:00,15:10:00,6293,1,,,,
488882,7253951,12:50:00,12:50:00,6293,1,,,,
489226,7253959,10:30:00,10:30:00,6293,1,,,,
489402,7253963,06:50:00,06:50:00,6293,1,,,,
489534,7253966,08:20:00,08:20:00,6293,1,,,,


Okay Cool! So the stop is 6293...

I'm going to want to compare AVL intersection times with the times in this dataframe

In [104]:
stop_txt = 'data/gtfs/{}/stops.txt'.format(gtfs_directory)
stops = pd.read_csv(stop_txt)

In [106]:
start_stop = stops[stops['stop_id'] == 6293]

In [107]:
start_stop

Unnamed: 0,stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url
3280,6293,Sacramento St & Cherry St,,37.786908,-122.45656,,


In [111]:
srtstp_latlon = (start_stop['stop_lat'].values[0], start_stop['stop_lon'].values[0])

In [112]:
srtstp_latlon

(37.786908000000004, -122.45656000000001)

In [117]:
test = coll.find_one()
test_latlon = (test['LATITUDE'], test['LONGITUDE'])

In [119]:
print(distance.distance(srtstp_latlon, test_latlon).m)

4896.8591130184595


Cool, let's check for intersections within 10 meters...

In [131]:
all_intersections = []

for doc in coll.find():
    doc_latlon = (doc['LATITUDE'], doc['LONGITUDE'])
    
    if distance.distance(srtstp_latlon, doc_latlon).m <= 10:
        
        all_intersections.append(doc)

In [132]:
len(all_intersections)

164

In [126]:
coll.count()

9515

Wait, I should only get buses with one train_assign...

In [127]:
intersections = []

for doc in coll.find():
    doc_latlon = (doc['LATITUDE'], doc['LONGITUDE'])
    
    if distance.distance(srtstp_latlon, doc_latlon).m <= 10:
        
        if doc['TRAIN_ASSIGNMENT'] == '3302':
        
            intersections.append(doc)

In [129]:
len(intersections)

15

In [134]:
for item in intersections:
    print(item['REPORT_TIME'], item['VEHICLE_TAG'])

12/04/2016 10:08:27 5408
12/04/2016 10:09:57 5408
12/04/2016 10:11:27 5408
12/04/2016 12:32:26 5408
12/04/2016 12:33:56 5408
12/04/2016 12:35:26 5408
12/04/2016 12:36:56 5408
12/04/2016 12:38:26 5408
12/04/2016 12:39:56 5408
12/04/2016 12:41:26 5408
12/04/2016 12:42:56 5408
12/04/2016 12:44:26 5408
12/04/2016 12:45:56 5408
12/04/2016 12:47:26 5408
12/04/2016 12:48:56 5408


In [144]:
cheese = []

for item in all_intersections:
    cheese.append(item['REPORT_TIME'])
#     print(item['REPORT_TIME'], item['VEHICLE_TAG'])

cheese.sort()

cheese


['12/04/2016 00:15:23',
 '12/04/2016 00:16:53',
 '12/04/2016 00:18:23',
 '12/04/2016 00:19:53',
 '12/04/2016 00:20:17',
 '12/04/2016 00:21:23',
 '12/04/2016 00:22:53',
 '12/04/2016 00:24:23',
 '12/04/2016 00:25:53',
 '12/04/2016 00:27:23',
 '12/04/2016 00:28:53',
 '12/04/2016 00:30:23',
 '12/04/2016 00:44:31',
 '12/04/2016 05:48:41',
 '12/04/2016 05:50:11',
 '12/04/2016 07:09:43',
 '12/04/2016 07:11:13',
 '12/04/2016 07:12:43',
 '12/04/2016 07:14:13',
 '12/04/2016 07:15:43',
 '12/04/2016 07:17:13',
 '12/04/2016 07:18:43',
 '12/04/2016 07:20:13',
 '12/04/2016 09:17:15',
 '12/04/2016 09:18:46',
 '12/04/2016 09:20:16',
 '12/04/2016 09:21:46',
 '12/04/2016 09:23:16',
 '12/04/2016 10:08:27',
 '12/04/2016 10:09:57',
 '12/04/2016 10:11:27',
 '12/04/2016 11:09:48',
 '12/04/2016 11:11:18',
 '12/04/2016 11:36:09',
 '12/04/2016 11:37:39',
 '12/04/2016 11:37:47',
 '12/04/2016 11:39:09',
 '12/04/2016 11:40:39',
 '12/04/2016 11:42:09',
 '12/04/2016 11:43:39',
 '12/04/2016 11:45:09',
 '12/04/2016 11:

Fuck, how am I supposed to group these?

Each day, there are a few TRAIN_ASSIGNS.    
So that's one way...

I can test the time differnce between its arrival at the first stop at the second stop - if it's positive, then I know it's headed in the right direction?

I count 20 distinct intersections!

## Two Questions:

1. If Direction=1, is the last stop the first stop of direction=0?
2. What's the station ID of my 'last stop', folsom and 17th?

First, how many trips, given all blocks, with dir=0 and service=3?

In [279]:
chimmy = []

for item in all_intersections:
    chimmy.append(item['TRAIN_ASSIGNMENT'])
#     print(item['REPORT_TIME'], item['VEHICLE_TAG'])

challa = list(set(chimmy))
challa

['3306', '3307', '3302', '3305', '3303', '3301', '3304']

In [154]:
leaving_trips = trips[(trips['block_id'].isin(challa)) & (trips['service_id'] == 3) & (trips['direction_id'] == 0)]
leaving_trips.shape

(58, 7)

In [157]:
spec_trips_rev = trips[(trips['block_id'] == 3302) & (trips['service_id'] == 3) & (trips['direction_id'] == 1)]
rev_trip_ids = spec_trips_rev['trip_id'].unique()

In [164]:
stp_tm_trips_rev= stp_tm[stp_tm['trip_id'].isin(rev_trip_ids)]


single_rev_trip = stp_tm_trips_rev[stp_tm_trips_rev['trip_id'] == 7253992]

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
490464,7253992,18:40:00,18:40:00,3511,1,,,,
490465,7253992,18:40:55,18:40:55,4964,2,,,,
490466,7253992,18:41:26,18:41:26,6038,3,,,,
490467,7253992,18:42:28,18:42:28,6119,4,,,,
490468,7253992,18:43:19,18:43:19,6035,5,,,,
490469,7253992,18:44:29,18:44:29,6032,6,,,,
490470,7253992,18:45:33,18:45:33,6030,7,,,,
490471,7253992,18:47:00,18:47:00,6028,8,,,,
490472,7253992,18:47:48,18:47:48,3295,9,,,,
490473,7253992,18:49:00,18:49:00,3281,10,,,,


## Welp!

45 Stops, and the last is the first of the reverse trip...

Let's try to find the id of the 'last stop'

In [188]:
last_ten_seq = ['36', '37', '38', '39', '40', '41', '42', '43', '44', '45']
last_ten = [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]

last_ten_stop = stp_tm_trips[stp_tm_trips['stop_sequence'].isin([35,34,33])]

lst_stops = last_ten_stop[last_ten_stop['trip_id'] == 7253937]['stop_id'].values

In [189]:
last_33_stops = stops[stops['stop_id'].isin(lst_stops)]
last_33_stops

Unnamed: 0,stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url
360,3289,16th St & Harrison St,,37.765377,-122.41329,,
363,3292,16th St & Mission St,,37.76502,-122.41928,,
369,3299,16th St & Shotwell St,,37.765218,-122.416048,,


## Stop at 16th and Harrison
stop_id = 3289

In [181]:
lst_stop = (37.765377, -122.413290)

## Potential problem - what if a returning bus 'intersects' with the stop as it passes by in the other direction?

In [196]:
beginnings = []
endings = []

for doc in coll.find():

    doc_latlon = (doc['LATITUDE'], doc['LONGITUDE'])
    
    if distance.distance(srtstp_latlon, doc_latlon).m <= 10:
        
        if doc['TRAIN_ASSIGNMENT'] == '3302':
        
            beginnings.append(doc)
            
    if distance.distance(lst_stop, doc_latlon).m <= 20:
        
        if doc['TRAIN_ASSIGNMENT'] == '3302':
        
            endings.append(doc)

In [197]:
for item in beginnings:
    print(item['REPORT_TIME'], item['VEHICLE_TAG'])

12/04/2016 10:08:27 5408
12/04/2016 10:09:57 5408
12/04/2016 10:11:27 5408
12/04/2016 12:32:26 5408
12/04/2016 12:33:56 5408
12/04/2016 12:35:26 5408
12/04/2016 12:36:56 5408
12/04/2016 12:38:26 5408
12/04/2016 12:39:56 5408
12/04/2016 12:41:26 5408
12/04/2016 12:42:56 5408
12/04/2016 12:44:26 5408
12/04/2016 12:45:56 5408
12/04/2016 12:47:26 5408
12/04/2016 12:48:56 5408


In [198]:
for item in endings:
    print(item['REPORT_TIME'], item['VEHICLE_TAG'])

12/04/2016 07:18:57 5408
12/04/2016 11:51:53 5408
12/04/2016 15:54:57 5408
12/04/2016 18:49:57 5408
12/04/2016 20:29:27 5408
12/04/2016 20:30:05 5408


Let's find all blocks today, and check their vehicles:

In [200]:
blocks = []

for doc in coll.find():

    blocks.append(doc['TRAIN_ASSIGNMENT'])
    
blocks_today = list(set(blocks))

In [203]:
for block in blocks_today:
    
    vehicle = []
    
    for doc in coll.find({'TRAIN_ASSIGNMENT':block}):
        
        vehicle.append(doc['VEHICLE_TAG'])
        
    print (block, set(vehicle))
    

3306 {'5463', '5421'}
3307 {'5480', '5489'}
3302 {'5408'}
3305 {'5443'}
3303 {'5455', '5461'}
3301 {'5484'}
3304 {'5462', '5468'}


Next, let's check blocks for route_id's

In [209]:
routes_all = []

for block in blocks_today:
    
    # Get trips with this block
    block_trips = trips[(trips['block_id'] == int(block))]
    routes_all.extend(block_trips['route_id'].unique())

set(routes_all)

    

{11668}

Now, let's try to check intersections with 16th and Mission?

In [210]:
mission_16 = (37.765020, -122.419280)

In [216]:
beginnings = []
endings = []

for doc in coll.find():

    doc_latlon = (doc['LATITUDE'], doc['LONGITUDE'])
    
    if distance.distance(srtstp_latlon, doc_latlon).m <= 10:
        
        if doc['TRAIN_ASSIGNMENT'] == '3302':
        
            beginnings.append(doc)
            
    if distance.distance(mission_16, doc_latlon).m <= 10:
        
        if doc['TRAIN_ASSIGNMENT'] == '3302':
        
            endings.append(doc)

In [217]:
for item in beginnings:
    print(item['REPORT_TIME'], item['VEHICLE_TAG'])

12/04/2016 10:08:27 5408
12/04/2016 10:09:57 5408
12/04/2016 10:11:27 5408
12/04/2016 12:32:26 5408
12/04/2016 12:33:56 5408
12/04/2016 12:35:26 5408
12/04/2016 12:36:56 5408
12/04/2016 12:38:26 5408
12/04/2016 12:39:56 5408
12/04/2016 12:41:26 5408
12/04/2016 12:42:56 5408
12/04/2016 12:44:26 5408
12/04/2016 12:45:56 5408
12/04/2016 12:47:26 5408
12/04/2016 12:48:56 5408


In [218]:
for item in endings:
    print(item['REPORT_TIME'], item['VEHICLE_TAG'])

12/04/2016 11:06:57 5408
12/04/2016 11:07:44 5408
12/04/2016 13:32:26 5408
12/04/2016 14:14:27 5408
12/04/2016 18:09:56 5408
12/04/2016 18:51:56 5408
12/04/2016 20:26:27 5408


Maybe a little better?
What's strange is that there are more endings that beginnings - way more...

In [222]:
output = []

for doc in coll.find():
    
        if doc['TRAIN_ASSIGNMENT'] == '3302':
            
            doc_latlon = (doc['LATITUDE'], doc['LONGITUDE'])
            doc_time = doc['REPORT_TIME']
            
            output.append([doc_latlon, doc_time])

In [224]:
len(output)

1280

In [236]:
import folium

srtstp_latlon

map_2 = folium.Map(location=[37.770373, -122.436064],
                   tiles='Stamen Terrain',
                   zoom_start=13)

for data in output[0:600]:
        
    dist = distance.distance(srtstp_latlon, (float(data[0][0]), float(data[0][1]))).m
    
    folium.Marker([float(data[0][0]), float(data[0][1])], popup=data[1] + "\n" + str(dist)).add_to(map_2)



folium.Marker([srtstp_latlon[0], srtstp_latlon[1]], icon=folium.Icon(color='red')).add_to(map_2)
    
map_2

In [235]:
srtstp_latlon

(37.786908000000004, -122.45656000000001)

In [237]:
beginnings = []
endings = []

for doc in coll.find():

    doc_latlon = (doc['LATITUDE'], doc['LONGITUDE'])
    
    if distance.distance(srtstp_latlon, doc_latlon).m <= 20:
        
        if doc['TRAIN_ASSIGNMENT'] == '3302':
        
            beginnings.append(doc)
            
    if distance.distance(lst_stop, doc_latlon).m <= 20:
        
        if doc['TRAIN_ASSIGNMENT'] == '3302':
        
            endings.append(doc)

In [256]:
test_1 = beginnings[0]
test_1_time = datetime.strptime(test_1['REPORT_TIME'], '%m/%d/%Y %H:%M:%S')


test_2 = beginnings[1]
test_2_time = datetime.strptime(test_2['REPORT_TIME'], '%m/%d/%Y %H:%M:%S')

print(test_1_time)
print(test_2_time)


diff = test_2_time-test_1_time
diff.seconds

2016-12-04 06:36:57
2016-12-04 06:38:27


90

In [257]:
60*4

240

In [268]:
start_time_clusters = {}

print (len(start_time_clusters))

for idx, item in enumerate(beginnings):
    
    print (idx)
    
    cln_date = datetime.strptime(item['REPORT_TIME'], '%m/%d/%Y %H:%M:%S')
 
    if len(start_time_clusters) == 0:
        
        print ('no items in dict!')
        start_time_clusters[idx] = [item]
        
    else:  
        
        print ('items in dict!')
        
        matched = 0
        
        for key, value in start_time_clusters.items():
            
            print('checking each time in index {}'.format(key))
            
            for time in value:
                
                time_diff = cln_date - datetime.strptime(time['REPORT_TIME'], '%m/%d/%Y %H:%M:%S')
                
                print ('time difference in seconds: ', time_diff.seconds)
                
                if time_diff.seconds < 240:
                    
                    print ("Match found!")
                    
                    start_time_clusters[key].append(item)
                    matched +=1
                    
                    break
            
        if matched == 0:

            start_time_clusters[idx] = [item]
                
            print ("Did this break?")
            print ("\n")
                    
                
                

0
0
no items in dict!
1
items in dict!
checking each time in index 0
time difference in seconds:  90
Match found!
2
items in dict!
checking each time in index 0
time difference in seconds:  180
Match found!
3
items in dict!
checking each time in index 0
time difference in seconds:  270
time difference in seconds:  180
Match found!
4
items in dict!
checking each time in index 0
time difference in seconds:  360
time difference in seconds:  270
time difference in seconds:  180
Match found!
5
items in dict!
checking each time in index 0
time difference in seconds:  450
time difference in seconds:  360
time difference in seconds:  270
time difference in seconds:  180
Match found!
6
items in dict!
checking each time in index 0
time difference in seconds:  540
time difference in seconds:  450
time difference in seconds:  360
time difference in seconds:  270
time difference in seconds:  180
Match found!
7
items in dict!
checking each time in index 0
time difference in seconds:  630
time differ

time difference in seconds:  180
Match found!
44
items in dict!
checking each time in index 0
time difference in seconds:  22229
time difference in seconds:  22139
time difference in seconds:  22049
time difference in seconds:  21959
time difference in seconds:  21869
time difference in seconds:  21779
time difference in seconds:  21689
time difference in seconds:  21599
time difference in seconds:  21509
checking each time in index 9
time difference in seconds:  16650
time difference in seconds:  16560
time difference in seconds:  16470
time difference in seconds:  16449
time difference in seconds:  16380
time difference in seconds:  16290
time difference in seconds:  16276
time difference in seconds:  16200
time difference in seconds:  16110
checking each time in index 18
time difference in seconds:  9539
time difference in seconds:  9449
time difference in seconds:  9359
time difference in seconds:  9305
time difference in seconds:  9269
time difference in seconds:  9179
time differ

time difference in seconds:  180
Match found!
62
items in dict!
checking each time in index 0
time difference in seconds:  38519
time difference in seconds:  38429
time difference in seconds:  38339
time difference in seconds:  38249
time difference in seconds:  38159
time difference in seconds:  38069
time difference in seconds:  37979
time difference in seconds:  37889
time difference in seconds:  37799
checking each time in index 9
time difference in seconds:  32940
time difference in seconds:  32850
time difference in seconds:  32760
time difference in seconds:  32739
time difference in seconds:  32670
time difference in seconds:  32580
time difference in seconds:  32566
time difference in seconds:  32490
time difference in seconds:  32400
checking each time in index 18
time difference in seconds:  25829
time difference in seconds:  25739
time difference in seconds:  25649
time difference in seconds:  25595
time difference in seconds:  25559
time difference in seconds:  25469
time 

time difference in seconds:  8551
time difference in seconds:  8461
time difference in seconds:  8371
time difference in seconds:  8281
time difference in seconds:  8191
time difference in seconds:  8101
time difference in seconds:  8011
time difference in seconds:  7921
time difference in seconds:  7831
time difference in seconds:  7741
checking each time in index 70
time difference in seconds:  450
time difference in seconds:  360
time difference in seconds:  270
time difference in seconds:  180
Match found!
76
items in dict!
checking each time in index 0
time difference in seconds:  46980
time difference in seconds:  46890
time difference in seconds:  46800
time difference in seconds:  46710
time difference in seconds:  46620
time difference in seconds:  46530
time difference in seconds:  46440
time difference in seconds:  46350
time difference in seconds:  46260
checking each time in index 9
time difference in seconds:  41401
time difference in seconds:  41311
time difference in se

In [275]:
for key, value in start_time_clusters.items():
    
    times = []
    
    for item in value:
        
        times.append(datetime.strptime(item['REPORT_TIME'], '%m/%d/%Y %H:%M:%S'))
        
    print ("Range: {} to {}".format(min(times), max(times)))
        

Range: 2016-12-04 06:36:57 to 2016-12-04 06:48:57
Range: 2016-12-04 08:09:56 to 2016-12-04 08:18:56
Range: 2016-12-04 10:08:27 to 2016-12-04 10:29:27
Range: 2016-12-04 12:32:26 to 2016-12-04 12:48:56
Range: 2016-12-04 14:53:27 to 2016-12-04 15:09:57
Range: 2016-12-04 17:14:26 to 2016-12-04 17:29:26
Range: 2016-12-04 19:30:57 to 2016-12-04 19:48:57


In [278]:
start_times.sort_values('departure_time')

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
489402,7253963,06:50:00,06:50:00,6293,1,,,,
489534,7253966,08:20:00,08:20:00,6293,1,,,,
489226,7253959,10:30:00,10:30:00,6293,1,,,,
488882,7253951,12:50:00,12:50:00,6293,1,,,,
488574,7253944,15:10:00,15:10:00,6293,1,,,,
488266,7253937,17:30:00,17:30:00,6293,1,,,,
487958,7253930,19:50:00,19:50:00,6293,1,,,,


In [276]:
for item in beginnings:
    print(item['REPORT_TIME'], item['VEHICLE_TAG'])

12/04/2016 06:36:57 5408
12/04/2016 06:38:27 5408
12/04/2016 06:39:57 5408
12/04/2016 06:41:27 5408
12/04/2016 06:42:57 5408
12/04/2016 06:44:27 5408
12/04/2016 06:45:57 5408
12/04/2016 06:47:27 5408
12/04/2016 06:48:57 5408
12/04/2016 08:09:56 5408
12/04/2016 08:11:26 5408
12/04/2016 08:12:56 5408
12/04/2016 08:13:17 5408
12/04/2016 08:14:26 5408
12/04/2016 08:15:56 5408
12/04/2016 08:16:10 5408
12/04/2016 08:17:26 5408
12/04/2016 08:18:56 5408
12/04/2016 10:08:27 5408
12/04/2016 10:09:57 5408
12/04/2016 10:11:27 5408
12/04/2016 10:12:21 5408
12/04/2016 10:12:57 5408
12/04/2016 10:14:27 5408
12/04/2016 10:15:57 5408
12/04/2016 10:17:27 5408
12/04/2016 10:18:57 5408
12/04/2016 10:20:27 5408
12/04/2016 10:21:57 5408
12/04/2016 10:23:27 5408
12/04/2016 10:24:57 5408
12/04/2016 10:26:27 5408
12/04/2016 10:27:57 5408
12/04/2016 10:29:27 5408
12/04/2016 12:32:26 5408
12/04/2016 12:33:56 5408
12/04/2016 12:35:26 5408
12/04/2016 12:36:56 5408
12/04/2016 12:38:26 5408
12/04/2016 12:39:56 5408


In [4]:
gtfs_lookup_df = pd.read_csv('data/gtfs_lookup.csv')
gtfs_lookup_df

Unnamed: 0.1,Unnamed: 0,from_date,to_date,directory
0,0,2016-08-13,2017-02-10,sfmta_2017-02-10
1,1,2016-06-04,2016-08-12,sfmta_2016-08-12
2,2,2016-04-23,2016-06-03,sfmta_2016-06-03
3,3,2016-02-13,2016-04-22,sfmta_2016-04-22
4,4,2015-09-26,2016-02-12,sfmta_2016-02-12


In [7]:
sign_df = pd.read_csv('data/lookUpSignUpPeriods.csv')
sign_df

Unnamed: 0,SIGNID,SIGNUPNAME,FROMDATE,TODATE
0,105,2016 SUMMER,20160604,20160812
1,102,2016 GSU,20160423,20160603
2,100,2015 T-FALL,20150926,20160422
3,99,2015 SPRING,20150425,20150925
4,98,2015 JANUARY,20150131,20150424
5,97,2014 NOVEMBER,20141122,20150130
6,96,2014 FALL,20141025,20141121
7,95,2014 JUNE,20140621,20141024
8,93,2014 SUMMER,20140607,20140620
9,91,2014 SPRING,20140412,20140606
