In [4]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster

data = pd.read_csv('cloud-data/bus_data_modified.csv')
data.head()

Unnamed: 0,id,name,type,calculatedCourse,routeName,created,longitude,latitude,paxLoad,totalCap,more,deviceId,routeBlockId,timestamp,load,stop
0,4855,4131,Transit Bus,212.735226,LX Route,02:23 PM,-74.433571,40.519749,0,67,101,402213,111417,2024-10-31 18:24:00,0.0,Quads
1,4859,4177,Transit Bus,128.232575,LX Route,02:23 PM,-74.436304,40.524327,27,67,102,402325,111417,2024-10-31 18:24:00,0.402985,Livingston Student Center
2,4873,4191,Transit Bus,240.802514,B Route,02:23 PM,-74.457712,40.523463,8,67,101,402329,113278,2024-10-31 18:24:00,0.119403,
3,4876,4194,Transit Bus,299.246496,B Route,02:23 PM,-74.438946,40.521624,4,67,102,402330,113250,2024-10-31 18:24:00,0.059701,
4,17624,4002,Heavy Duty Transit LF,226.694476,REXB Route,02:23 PM,-74.43883,40.483538,2,68,102,402643,112511,2024-10-31 18:24:00,0.029412,


In [5]:
# convert timestamp to datetime
data['timestamp'] = pd.to_datetime(data['timestamp'])
# shift all timestamps 6 hrs back
data['timestamp'] = data['timestamp'] - pd.Timedelta(hours=4)
# only use the latitude and longitude columns between certain values
data = data[(data['latitude'] > 40.4) & (data['latitude'] < 40.53)]
data = data[(data['longitude'] > -74.5) & (data['longitude'] < -74.3)]
data.head()

Unnamed: 0,id,name,type,calculatedCourse,routeName,created,longitude,latitude,paxLoad,totalCap,more,deviceId,routeBlockId,timestamp,load,stop
0,4855,4131,Transit Bus,212.735226,LX Route,02:23 PM,-74.433571,40.519749,0,67,101,402213,111417,2024-10-31 14:24:00,0.0,Quads
1,4859,4177,Transit Bus,128.232575,LX Route,02:23 PM,-74.436304,40.524327,27,67,102,402325,111417,2024-10-31 14:24:00,0.402985,Livingston Student Center
2,4873,4191,Transit Bus,240.802514,B Route,02:23 PM,-74.457712,40.523463,8,67,101,402329,113278,2024-10-31 14:24:00,0.119403,
3,4876,4194,Transit Bus,299.246496,B Route,02:23 PM,-74.438946,40.521624,4,67,102,402330,113250,2024-10-31 14:24:00,0.059701,
4,17624,4002,Heavy Duty Transit LF,226.694476,REXB Route,02:23 PM,-74.43883,40.483538,2,68,102,402643,112511,2024-10-31 14:24:00,0.029412,


In [6]:
route = data['routeName'].unique()[0]

timestamps = data['timestamp'].unique()
data_route = data[data['routeName'] == route]
data_route.head()

Unnamed: 0,id,name,type,calculatedCourse,routeName,created,longitude,latitude,paxLoad,totalCap,more,deviceId,routeBlockId,timestamp,load,stop
0,4855,4131,Transit Bus,212.735226,LX Route,02:23 PM,-74.433571,40.519749,0,67,101,402213,111417,2024-10-31 14:24:00,0.0,Quads
1,4859,4177,Transit Bus,128.232575,LX Route,02:23 PM,-74.436304,40.524327,27,67,102,402325,111417,2024-10-31 14:24:00,0.402985,Livingston Student Center
5,4863,4181,Transit Bus,303.184585,LX Route,02:23 PM,-74.453571,40.506689,27,67,102,402645,111413,2024-10-31 14:24:00,0.402985,
6,4878,4196,Transit Bus,120.902282,LX Route,02:23 PM,-74.43464,40.523514,11,67,102,402694,111418,2024-10-31 14:24:00,0.164179,
7,4875,4193,Transit Bus,149.812119,LX Route,02:24 PM,-74.452001,40.503323,11,67,101,402696,111416,2024-10-31 14:24:00,0.164179,College Avenue Student Center


In [7]:
data_route['stop'].unique()

array(['Quads', 'Livingston Student Center', nan,
       'College Avenue Student Center', 'Student Activities Center (NB)',
       'The Yard', 'Livingston Plaza'], dtype=object)

In [9]:
stop_record = data_route['stop'].unique()[1]
stop_record

'Livingston Student Center'

In [17]:
loop_times = {} # id of bus with [loop start, loop end, time spent]

for bus in data_route['id'].unique():
    bus_data = data_route[data_route['id'] == bus]

    loop_times[bus] = []
    
    # get the first time it reaches the stop_record
    in_transit = True
    start = None
    end = None
    for i in range(len(bus_data)):
        # get the first time it reaches the stop_record
        # only consider a loop if the bus travels to some other stop before it reaches the stop_record again
        stop = bus_data.iloc[i]['stop']
        if stop == None: continue

        if in_transit and stop == stop_record:
            start = bus_data.iloc[i]['timestamp']
            in_transit = False
        elif in_transit and stop == stop_record:
            end = bus_data.iloc[i]['timestamp']
            in_transit = False
        elif stop != stop_record:
            in_transit = True
            
        if start and end:
            loop_times[bus].append([start, end, end - start])
            start = None
            end = None
            in_transit = True

loop_times

{4855: [],
 4859: [],
 4863: [],
 4878: [],
 4875: [],
 7149: [],
 4843: [],
 11197: [],
 13211: [],
 13213: [],
 18018: [],
 4873: [],
 15186: [],
 13216: [],
 18014: [],
 4885: [],
 7151: [],
 13210: [],
 15189: [],
 13217: [],
 4872: [],
 18020: [],
 9418: [],
 4881: [],
 11204: [],
 9421: [],
 4860: [],
 4853: [],
 4864: [],
 17626: [],
 15187: [],
 4850: [],
 4862: [],
 4851: [],
 4877: [],
 7179: [],
 4891: []}

In [15]:
for key in loop_times:
    print(key)
    for loop in loop_times[key]:
        print(loop[0], loop[1], loop[2])

4855
2024-10-31 15:02:42 2024-10-31 15:42:58 0 days 00:40:16
2024-10-31 15:43:29 2024-10-31 16:21:45 0 days 00:38:16
2024-10-31 16:22:16 2024-10-31 17:03:39 0 days 00:41:23
2024-10-31 17:04:10 2024-10-31 17:43:32 0 days 00:39:22
2024-10-31 17:44:03 2024-10-31 17:46:06 0 days 00:02:03
2024-10-31 17:46:37 2024-10-31 18:15:16 0 days 00:28:39
2024-10-31 18:15:47 2024-10-31 18:17:50 0 days 00:02:03
2024-10-31 18:18:21 2024-10-31 18:20:24 0 days 00:02:03
2024-10-31 18:20:55 2024-10-31 18:22:58 0 days 00:02:03
2024-10-31 18:23:28 2024-10-31 18:25:31 0 days 00:02:03
2024-10-31 18:26:02 2024-10-31 18:56:47 0 days 00:30:45
2024-10-31 18:57:17 2024-10-31 19:25:30 0 days 00:28:13
2024-10-31 19:26:00 2024-10-31 19:54:44 0 days 00:28:44
2024-10-31 19:55:15 2024-10-31 19:57:18 0 days 00:02:03
2024-10-31 19:57:49 2024-10-31 19:59:52 0 days 00:02:03
2024-10-31 20:00:23 2024-10-31 20:02:26 0 days 00:02:03
2024-10-31 20:02:56 2024-10-31 20:04:59 0 days 00:02:03
2024-10-31 20:05:30 2024-10-31 20:32:43 0 d