In [22]:
import pandas as pd

df = pd.read_csv("bc_trip259172515_230215.csv")

df.head()

Unnamed: 0,EVENT_NO_TRIP,EVENT_NO_STOP,OPD_DATE,VEHICLE_ID,METERS,ACT_TIME,GPS_LONGITUDE,GPS_LATITUDE,GPS_SATELLITES,GPS_HDOP
0,259172515,259172517,15FEB2023:00:00:00,4223,40,20469,-122.648137,45.493082,12,0.7
1,259172515,259172517,15FEB2023:00:00:00,4223,48,20474,-122.64824,45.49307,12,0.8
2,259172515,259172517,15FEB2023:00:00:00,4223,57,20479,-122.648352,45.493123,12,0.8
3,259172515,259172517,15FEB2023:00:00:00,4223,73,20484,-122.648385,45.493262,12,0.7
4,259172515,259172517,15FEB2023:00:00:00,4223,112,20489,-122.648347,45.493582,12,0.8


In [12]:
df.drop(columns=["EVENT_NO_STOP", "GPS_SATELLITES", "GPS_HDOP"], inplace=True)
df.head()

Unnamed: 0,EVENT_NO_TRIP,OPD_DATE,VEHICLE_ID,METERS,ACT_TIME,GPS_LONGITUDE,GPS_LATITUDE
0,259172515,15FEB2023:00:00:00,4223,40,20469,-122.648137,45.493082
1,259172515,15FEB2023:00:00:00,4223,48,20474,-122.64824,45.49307
2,259172515,15FEB2023:00:00:00,4223,57,20479,-122.648352,45.493123
3,259172515,15FEB2023:00:00:00,4223,73,20484,-122.648385,45.493262
4,259172515,15FEB2023:00:00:00,4223,112,20489,-122.648347,45.493582


In [13]:
columns_to_keep = ['EVENT_NO_TRIP', 'OPD_DATE', 'VEHICLE_ID', 'METERS', 'ACT_TIME', 'GPS_LONGITUDE', 'GPS_LATITUDE']

df = pd.read_csv("bc_trip259172515_230215.csv", usecols=columns_to_keep)

df.head()

Unnamed: 0,EVENT_NO_TRIP,OPD_DATE,VEHICLE_ID,METERS,ACT_TIME,GPS_LONGITUDE,GPS_LATITUDE
0,259172515,15FEB2023:00:00:00,4223,40,20469,-122.648137,45.493082
1,259172515,15FEB2023:00:00:00,4223,48,20474,-122.64824,45.49307
2,259172515,15FEB2023:00:00:00,4223,57,20479,-122.648352,45.493123
3,259172515,15FEB2023:00:00:00,4223,73,20484,-122.648385,45.493262
4,259172515,15FEB2023:00:00:00,4223,112,20489,-122.648347,45.493582


In [14]:
def decode_timestamp(opd_date, act_time):
  date_obj = pd.to_datetime(opd_date, format='%d%b%Y:00:00:00')
  time_delta = pd.to_timedelta(act_time, unit='s')
  timestamp = date_obj + time_delta
  return timestamp

df['TIMESTAMP'] = df.apply(lambda row: decode_timestamp(row['OPD_DATE'], row['ACT_TIME']), axis=1)

df.head()

Unnamed: 0,EVENT_NO_TRIP,OPD_DATE,VEHICLE_ID,METERS,ACT_TIME,GPS_LONGITUDE,GPS_LATITUDE,TIMESTAMP
0,259172515,15FEB2023:00:00:00,4223,40,20469,-122.648137,45.493082,2023-02-15 05:41:09
1,259172515,15FEB2023:00:00:00,4223,48,20474,-122.64824,45.49307,2023-02-15 05:41:14
2,259172515,15FEB2023:00:00:00,4223,57,20479,-122.648352,45.493123,2023-02-15 05:41:19
3,259172515,15FEB2023:00:00:00,4223,73,20484,-122.648385,45.493262,2023-02-15 05:41:24
4,259172515,15FEB2023:00:00:00,4223,112,20489,-122.648347,45.493582,2023-02-15 05:41:29


In [15]:
df.drop(['OPD_DATE', 'ACT_TIME'], axis=1, inplace=True)

df.head()

Unnamed: 0,EVENT_NO_TRIP,VEHICLE_ID,METERS,GPS_LONGITUDE,GPS_LATITUDE,TIMESTAMP
0,259172515,4223,40,-122.648137,45.493082,2023-02-15 05:41:09
1,259172515,4223,48,-122.64824,45.49307,2023-02-15 05:41:14
2,259172515,4223,57,-122.648352,45.493123,2023-02-15 05:41:19
3,259172515,4223,73,-122.648385,45.493262,2023-02-15 05:41:24
4,259172515,4223,112,-122.648347,45.493582,2023-02-15 05:41:29


In [20]:
df = df.sort_values(by='TIMESTAMP')

df['dMETERS'] = df['METERS'].diff()
df['dTIMESTAMP'] = df['TIMESTAMP'].diff().dt.total_seconds()

df['SPEED'] = df.apply(lambda row: row['dMETERS'] / row['dTIMESTAMP'], axis=1)

df.drop(columns=["dMETERS", "dTIMESTAMP"], inplace=True)

df.head()

Unnamed: 0,EVENT_NO_TRIP,VEHICLE_ID,METERS,GPS_LONGITUDE,GPS_LATITUDE,TIMESTAMP,SPEED
0,259172515,4223,40,-122.648137,45.493082,2023-02-15 05:41:09,
1,259172515,4223,48,-122.64824,45.49307,2023-02-15 05:41:14,1.6
2,259172515,4223,57,-122.648352,45.493123,2023-02-15 05:41:19,1.8
3,259172515,4223,73,-122.648385,45.493262,2023-02-15 05:41:24,3.2
4,259172515,4223,112,-122.648347,45.493582,2023-02-15 05:41:29,7.8


In [21]:
speed_stats = df['SPEED'].describe()

# Extracting specific statistics
minimum_speed = speed_stats['min']
maximum_speed = speed_stats['max']
average_speed = speed_stats['mean']

print("Minimum Speed:", minimum_speed, "m/s")
print("Maximum Speed:", maximum_speed, "m/s")
print("Average Speed:", average_speed, "m/s")

Minimum Speed: 0.0 m/s
Maximum Speed: 17.4 m/s
Average Speed: 7.227205815018314 m/s
