In [18]:
%matplotlib inline

import os
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import Parse, MessageToJson
import json
import psycopg2
import time
from django.contrib.gis.geos import Point, fromstr, GEOSGeometry
from datetime import datetime, timedelta
import geopandas as gpd
import rtree
import vaex
import vaex.ml
from joblib import delayed, Parallel, load, parallel_backend
from util import direction_angle, apply_dow, get_dt, get_conn, vaex_mjoin, find_trip_regex, get_conn, run_query

dir = Path.cwd()
dir = os.path.join(dir, '../')
outdir = os.path.join(dir, 'output')
gtfs_records_zip = os.path.join(dir, "data", "GtfsRRecords.zip")
gtfs_csv_zip = os.path.join(outdir, "gtfsr_csv.zip")
gtfs_final_csv_path = os.path.join(outdir, "gtfsr.csv")
gtfs_processed_path = os.path.join(outdir, "gtfsr_processed.hdf5")
scats_model_path = os.path.join(outdir, "scats_model.json")
gtfsr_processing_temp = os.path.join(outdir, "processing_temp.hdf5")
gtfsr_arrival_means = os.path.join(outdir, "gtfsr_historical_means.hdf5")
stop_time_data_path = os.path.join(outdir, 'stop_time_data.hdf5')
gtfs_final_hdf5_path = os.path.join(outdir, "gtfsr.csv.hdf5")

entity_cols = [
    "trip_id",
    "start_date",
    "start_time",
    "stop_sequence",
    "departure",
    "arrival",
    "timestamp",
    "stop_id",
]

In [85]:
live_df = vaex.open(os.path.join(outdir, 'deploy.hdf5'))
live_df

#,route_id,direction,stop_id,start_time,timestamp,arr_dow,stop_sequence,start_date,arrival,arrival_time,departure_time,shape_dist_traveled,lat,lon,direction_angle,shape_dist_between,trip_id,arr_hour
0,60-140-b12-1,0,8220DB000264,17:45:00,2021-03-10 17:03:18,2,26,20210300.0,0,18:14:07,18:14:07,10425.1,53.3535,-6.26226,171.484,518.59,20514.y1003.60-140-b12-1.75.O,18


In [92]:
st_df = vaex.open(os.path.join(outdir, 'stop_time_data.hdf5'))

st_df.dtypes

# st_df[
#     (st_df["route_id"] == live_df[["route_id"]][0][0])
#     & (st_df["stop_sequence"] == live_df[["stop_sequence"]][0][0])
#     & (st_df["stop_id"] == live_df[["stop_id"]][0][0])
#     & (st_df["start_time"] == live_df[["start_time"]][0][0])
#     & (st_df["direction"] == live_df[["direction"]][0][0])
# ].copy().dtypes


arrival_time           <class 'str'>
departure_time         <class 'str'>
stop_sequence                  int64
shape_dist_traveled          float64
stop_id                <class 'str'>
direction                      int64
route_id               <class 'str'>
service_days           <class 'str'>
lat                          float64
lon                          float64
direction_angle              float64
shape_dist_between           float64
trip_id                <class 'str'>
start_time             <class 'str'>
dtype: object

In [71]:
hm_df = vaex.open(gtfsr_arrival_means)

temp_df = hm_df[
    (hm_df['route_id'] == live_df[['route_id']][0][0])
    & (hm_df['direction'] == int(live_df[['direction']][0][0]))
    & (hm_df['stop_id'] == live_df[['stop_id']][0][0])
    & (hm_df["arr_dow"] == live_df[["arr_dow"]][0][0])
    & (hm_df["arr_hour"] == live_df[["arr_hour"]][0][0])
    & (hm_df["stop_sequence"] == live_df[["stop_sequence"]][0][0])
].copy()

temp_df

#,route_id,stop_id,arr_dow,arr_hour,direction,stop_sequence,arrival_mean,p_mean_vol
0,60-140-b12-1,8220DB000264,2,18,0,26,0.277778,49.0521


In [72]:
cols = ["route_id", "stop_id", "arr_dow", "arr_hour", "direction", "stop_sequence"]
vaex_mjoin(
    live_df,
    temp_df.copy(),
    cols,
    cols,
    how="inner",
)

#,route_id,direction,stop_id,start_time,timestamp,arr_dow,stop_sequence,start_date,arrival,arrival_time,departure_time,shape_dist_traveled,lat,lon,direction_angle,shape_dist_between,trip_id,arr_hour,arrival_mean,p_mean_vol
0,60-140-b12-1,0,8220DB000264,17:45:00,2021-03-10 17:03:18,2,26,20210300.0,0,18:14:07,18:14:07,10425.1,53.3535,-6.26226,171.484,518.59,20514.y1003.60-140-b12-1.75.O,18,0.277778,49.0521


In [12]:
'19389.1.60-155-d12-1.89.O'.split('.')[-1]

'O'

In [160]:
# concats cols together using a divider
def concat_cols(dt, cols, name="concat_col", divider="|"):
    # Create the join column
    for i, col in enumerate(cols):
        if not dt[col].dtype == str:
            dt[col] = dt[col].astype(str)

        if i == 0:
            dt[name] = dt[col].fillna("")
        else:
            dt[name] = dt[name] + divider + dt[col].fillna("")

    # Ensure it's a string; on rare occassions it's an object
    if not dt[name].dtype == str:
        dt[name] = dt[name].astype(str)

    return dt, name


# https://github.com/vaexio/vaex/issues/746
# de duplicate rows
def vx_dedupe(dt, columns=None, concat_first=True):
    # Get and join columns
    init_cols = dt.get_column_names()
    if columns is None:
        columns = init_cols
    if concat_first:
        dt, concat_col = concat_cols(dt, columns)
        col_names = [concat_col]
    else:
        col_names = columns

    # Add named sets
    sets = [dt._set(col_name) for col_name in col_names]
    counts = [set.count for set in sets]
    set_names = [dt.add_variable("set_{}".format(col_name), set, unique=True) for col_name, set in zip(col_names, sets)]

    # Create 'row_id' column that gives each unique row the same ID
    expression = dt["_ordinal_values({}, {})".format(col_names[0], set_names[0])].astype("int64")
    product_count = 1
    for col_name, set_name, count in zip(col_names[1:], set_names[1:], counts[:-1]):
        product_count *= count
        expression = (
            expression + dt["_ordinal_values({}, {})".format(col_name, set_name)].astype("int64") * product_count
        )
    dt["row_id"] = expression

    # This is not 'stable'; because it is multithreaded, we may get a different id each time
    index = dt._index("row_id")
    unique_row_ids = dt.row_id.unique()
    indices = index.map_index(unique_row_ids)

    # Dedupe
    deduped = dt.take(indices)
    deduped = deduped[init_cols]

    return deduped

In [161]:
def dir_f_trip(trip_id):
    tokens = trip_id.split(".")

    if not len(tokens) == 5:
        return 500

    if tokens[4] == 'I': 
        return 1
    else: 
        return 0

df = vaex.open(gtfs_final_hdf5_path)[:11]

df['direction'] = df['trip_id'].apply(lambda t: dir_f_trip(t))
df["dow"] = df["start_date"].apply(lambda t: get_dt(t, "%Y%m%d").weekday())

df.materialize('direction', inplace=True)
df.materialize('dow', inplace=True)

df = df[df['direction'] != 500]

df.drop('trip_id', inplace=True)

cols = ["route_id", "stop_sequence", "stop_id", "start_time", 'direction']
df = vaex_mjoin(df.shallow_copy(), vaex.open(stop_time_data_path), cols, cols, how="inner", allow_duplication=True)

df['keep_trip'] = df.apply(
    lambda sd, dow: sd.replace('[', '').replace(']', '').replace(' ', '').split(',')[dow], 
    ['service_days', 'dow']
)

df = df[df.keep_trip == 'True']
df.drop(['service_days', 'dow', 'keep_trip'], inplace=True)

# df = vaex.from_pandas(df.to_pandas_df().drop_duplicates())
# df = vx_dedupe(df.shallow_copy(), dedupe_cols=None, concat_first=False)
df = vx_dedupe(df, columns=[i for i in df.get_column_names() if i != 'trip_id'])
df

#,route_id,start_date,start_time,stop_sequence,departure,arrival,timestamp,stop_id,direction,arrival_time,departure_time,shape_dist_traveled,lat,lon,direction_angle,shape_dist_between,trip_id
0,60-155-b12-1,20210131,23:00:00,1,0,0,2021-02-01 00:00:53,8220DB007698,0,23:00:00,23:00:00,0.0,53.4063848668136,-6.2746347975114,139.31470635060316,0.0,14088.y1002.60-155-b12-1.89.O
1,60-155-b12-1,20210131,23:00:00,48,540,540,2021-02-01 00:00:53,8250DB007353,0,23:35:20,23:35:20,16345.6,53.2934957288645,-6.20172063261915,139.31470635060316,313.89999999999964,14088.y1002.60-155-b12-1.89.O
2,60-130-b12-1,20210131,23:30:00,1,0,0,2021-02-01 00:00:53,8220DB001772,1,23:30:00,23:30:00,0.0,53.3650107475168,-6.20502103771957,-107.23111263684194,0.0,16028.y1002.60-130-b12-1.74.I
3,60-130-b12-1,20210131,23:30:00,18,1200,1200,2021-02-01 00:00:53,8220DB001729,1,23:38:37,23:38:37,4808.93,53.3584969058401,-6.19031550974047,-107.23111263684194,304.8600000000006,16028.y1002.60-130-b12-1.74.I
4,60-130-b12-1,20210131,23:30:00,20,1140,1140,2021-02-01 00:00:53,8220DB001731,1,23:39:28,23:39:28,5252.7,53.3590767534218,-6.19655620427261,-107.23111263684194,187.39999999999964,16028.y1002.60-130-b12-1.74.I
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8,60-40-b12-1,20210131,23:00:00,1,0,0,2021-02-01 00:00:53,8240DB007132,0,23:00:00,23:00:00,0.0,53.4030889703351,-6.30430656007651,-118.90642649102342,0.0,12590.y1002.60-40-b12-1.207.O
9,60-40-b12-1,20210131,23:00:00,79,120,120,2021-02-01 00:00:53,8230DB004690,0,23:56:58,23:56:58,28067.9,53.3495094392524,-6.40519639715847,-118.90642649102342,265.2900000000009,12590.y1002.60-40-b12-1.207.O
10,2-332-gad-1,20210201,00:00:00,1,0,0,2021-02-01 00:00:53,8240DB004330,0,00:00:00,00:00:00,0.0,53.4547858681153,-6.21835781834225,65.59761778706122,0.0,9927.10441.2-332-gad-1.270.O
11,60-41-b12-1,20210201,00:00:00,25,0,0,2021-02-01 00:00:53,8240DB007347,1,00:16:17,00:16:17,9064.77,53.428019654753,-6.24202072636356,-177.303210091669,687.460000000001,20634.y1003.60-41-b12-1.223.I


In [170]:
st_df = vaex.open(stop_time_data_path)
# st_df[
#     (st_df["route_id"] == '60-46A-b12-1')
#     & (st_df["stop_sequence"] == '13')
#     & (st_df["stop_id"] == '8220DB000264')
#     & (st_df["start_time"] == '15:10:00')
#     & (st_df["direction"] == '0')
# ].copy()

st_df.stop_sequence
# st_df = vx_dedupe(st_df, columns=[i for i in st_df.get_column_names() if i != 'trip_id'], concat_first=True)
# st_df

Expression = stop_sequence
Length: 1,802,036 dtype: str (column)
-------------------------------------
      0  44
      1  45
      2  46
      3  47
      4  48
    ...    
1802031  39
1802032  40
1802033  41
1802034  42
1802035  43

In [14]:
c = ["trip_id", "stop_sequence", "stop_id", "start_time"]
vaex_mjoin(vaex.open(gtfs_final_hdf5_path)[:10], vaex.open(stop_time_data_path), c, c, how="inner", allow_duplication=True, lsuffix='qwer')

#,trip_id,route_idqwer,start_date,start_time,stop_sequence,departure,arrival,timestamp,stop_id,arrival_time,departure_time,shape_dist_traveled,direction,route_id,service_days,lat,lon,direction_angle,shape_dist_between
0,9931.10447.2-332-gad-1.270.O,2-332-gad-1,20210200.0,00:00:00,1,0,0,2021-02-01 00:00:53,8240DB004330,00:00:00,00:00:00,0,0,2-332-gad-1,"[False, False, False, False, False, False, True]",53.4548,-6.21836,65.5976,0


In [140]:
df.shape_dist_between.astype('str')

Expression = astype(shape_dist_between, 'str')
Length: 13 dtype: str (expression)
----------------------------------
 0       0
 1  687.46
 2       0
 3       0
 4   313.9
   ...    
 8  304.86
 9   187.4
10       0
11  687.46
12  687.46

In [4]:
# 

In [88]:
# st_df


In [6]:
vaex.open(os.path.join(outdir, 'gtfsr_model.hdf5'))
# df = vaex.open(gtfs_final_hdf5_path)
# df['route_id'] = df.trip_id.apply(lambda s: s.split(".")[2])

# route_list = df["route_id"].unique().tolist()
# len(route_list)


#,start_date,start_time,stop_sequence,arrival,timestamp,stop_id,arrival_time,shape_dist_traveled,direction,route_id,lat,lon,direction_angle,shape_dist_between,arr_dow,arr_hour,arrival_mean,p_mean_vol
0,20210225,09:40:00,1,0,2021-02-25 09:47:30,8220DB002243,09:40:00,0.0,1,2-76-gad-1,53.3519743826832,-6.35569799515705,-165.0706674216439,0.0,3,9,0.0,197.40237024527516
1,20210213,12:00:00,21,0,2021-02-13 12:36:18,8220DB000251,12:24:00,7333.43,0,2-104-gad-1,53.3848322787477,-6.22321978633306,-53.53256488306763,201.30000000000018,5,12,-0.75,80.29722118896139
2,20210219,13:00:00,32,-2,2021-02-19 13:31:35,8220DB001698,13:27:09,13047.16,1,60-38-b12-1,53.3695772972038,-6.32371675132506,116.99296799410554,295.75,4,13,-2.0,82.78559070005892
3,20210216,13:30:00,7,1,2021-02-16 13:50:36,8250DB004396,13:39:00,4093.66,0,2-63-gad-1,53.2568126626389,-6.19327750999971,44.92264970129826,590.7799999999997,1,13,1.75,53.82577578185783
4,20210224,17:38:00,18,0,2021-02-24 17:43:47,8350DB004289,17:56:00,7615.63,1,2-184-gad-1,53.1462163499678,-6.07772339688384,179.74499775119085,448.21000000000004,2,17,0.75,6.021249221335117
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1037256,20210213,06:55:00,67,7,2021-02-13 07:57:09,8230DB002124,07:54:25,24723.48,0,60-40-b12-1,53.3469584164086,-6.39793187307249,-118.90642649102342,247.88000000000102,5,7,7.0,17.76438877638021
1037257,20210228,22:00:00,6,1,2021-02-28 22:10:00,8230DB004348,22:05:44,3720.27,1,60-54A-b12-1,53.2866720396025,-6.37505043118615,57.0259061053009,418.8000000000002,6,22,1.0,8.367191151990975
1037258,20210204,17:40:00,26,3,2021-02-04 18:24:56,8230DB003425,18:14:00,10747.7,1,2-76-gad-1,53.3139017328619,-6.39199620954264,-165.0706674216439,426.0700000000015,3,18,2.8,272.1106011308306
1037259,20210210,18:20:00,2,1,2021-02-10 18:29:42,8220DB002200,18:20:00,307.63,1,2-76-gad-1,53.3502827503688,-6.35295415067495,-165.0706674216439,307.63,2,18,2.7777777777777777,99.8292126491831


In [9]:
# live_df.state_load(os.path.join(outdir, 'gtfsr_model.json'))

In [2]:
str(datetime.now())

'2021-03-04 16:43:34.367100'

In [10]:
from pandas import array

model_df = vaex.from_dict(
    {
        "trip_id": array(["19389.1.60-155-d12-1.89.O"], dtype=object),
        "start_date": array([20210302]),
        "start_time": array(["19:20:00"], dtype=object),
        "stop_sequence": array([24]),
        "arrival": array([5.0]),
        "timestamp": array(["2021-03-02 19:51:26"], dtype=object),
        "stop_id": array(["8220DB000264"], dtype=object),
        "arrival_time": array(["19:38:38"], dtype=object),
        "shape_dist_traveled": array([7818.16]),
        "direction": array(["0"], dtype=object),
        "route_id": array(["60-155-d12-1"], dtype=object),
        "lat": array([53.3535353]),
        "lon": array([-6.26225863]),
        "direction_angle": array([139.31470635]),
        "shape_dist_between": array([518.6]),
        "arr_dow": array([1]),
        "arr_hour": array([19]),
        "arrival_mean": array([6.0]),
        "p_mean_vol": array([68.53864425]),
    }
)
# model_df.state_load(gtfsr_model_path)
# model_state = model_df.state_get()

In [11]:
import io

# path = os.path.join(outdir, "gtfsr_model.json")
# f = open(path, 'r')

# df.state_load(path)
# tmp = io.StringIO(f)
# tmp.read()
# df = vaex.open(tmp)
# df

In [13]:
import re

t_id = '20440.y1003.60-140-b12-1.75.O'
trip_list = ['4124.4.60-140-d12-1.75.O', '3775.4.60-46A-d12-1.258.O', '3005.4.60-155-d12-1.89.O', '3817.4.60-46A-d12-1.258.O', '4176.4.60-140-d12-1.75.O', '3809.4.60-46A-d12-1.258.O', '3866.4.60-46A-d12-1.258.O', '3020.4.60-155-d12-1.89.O', '4185.4.60-140-d12-1.75.O', '3826.4.60-46A-d12-1.258.O', '3095.4.60-46A-d12-1.258.O', '4197.4.60-140-d12-1.75.O', '3064.4.60-155-d12-1.89.O', '3877.4.60-46A-d12-1.258.O']

def find_trip_regex(trip_list, trip_id):
    if not type(trip_id) == str:
        return None

    tokens = trip_id.split(".")
    # print(tokens)
    if not len(tokens) == 5:
        return None

    route_id = tokens[2].split("-")

    if route_id[2] in ["ga2", "gad"]:
        route_id[2] = "ga[2|d]"
        tokens[2] = "-".join(route_id)
    elif route_id[2] in ["d12", "b12"]:
        route_id[2] = "[b|d]12"
        tokens[2] = "-".join(route_id)

    if 'y' in tokens[1]:
        tokens[0] = ''
        tokens[1] = '*'

    tokens[3] = "*"

    reg = ".".join(tokens)

    r = re.compile(reg)
    matched_list = list(filter(r.match, trip_list))

    if len(matched_list) > 0:
        print(trip_id)
        return matched_list
    else:
        return None

find_trip_regex(trip_list, t_id)

20440.y1003.60-140-b12-1.75.O


['4124.4.60-140-d12-1.75.O',
 '4176.4.60-140-d12-1.75.O',
 '4185.4.60-140-d12-1.75.O',
 '4197.4.60-140-d12-1.75.O']

In [102]:
st_df = vaex.open(stop_time_data_path)
hm_df = vaex.open(gtfsr_arrival_means)

temp_df = {'trip_id': array(['12426.3.60-151-d12-1.85.O'], dtype='<U25'), 'stop_sequence': array([9]), 'stop_id': array(['8220DB004522'], dtype='<U12'), 'start_time': array(['23:00:00'], dtype='<U8'), 'start_date': array([20210307]), 'timestamp': array(['2021-03-07 22:51:27'], dtype='<U19'), 'arrival': array([0])}

t_id = '12426.3.60-151-d12-1.85.O'

st_df[st_df.trip_id == t_id]
# temp_df['trip_id'][0]

#,arrival_time,departure_time,stop_sequence,shape_dist_traveled,stop_id,direction,route_id,lat,lon,direction_angle,shape_dist_between,trip_id,start_time
,,,,,,,,,,,,,


In [106]:
print(
    len(st_df.trip_id.unique()),
    len(hm_df.trip_id.unique()),
)

29048 29012


In [34]:
route_id = '60-53-d12-1'
route_list = ['60-53-b12-1']

# find the correct route
def find_route_regex(route_list, route_id):
    if not type(route_id) == str:
        return None

    tokens = route_id.split("-")
    if not len(tokens) == 4:
        return None

    if tokens[2] in ["ga2", "gad"]:
        tokens[2] = "ga[2|d]"
    elif tokens[2] in ["d12", "b12"]:
        tokens[2] = "[b|d]12"

    reg = "-".join(tokens)  # join tokens by using a dot between

    r = re.compile(reg)
    matched_list = list(filter(r.match, route_list))  # get a list of matched trip ids

    if len(matched_list) > 0:
        return matched_list[0]
    else:
        return None
find_route_regex(route_list, route_id)

['60', '53', '[b|d]12', '1']


'60-53-b12-1'

In [6]:
def get_stop_time_df(trip_id, conn):
    query = """
    select 
        stop_time.arrival_time, stop_time.departure_time,
        stop_time.stop_sequence, stop_time.shape_dist_traveled, 
        stop.stop_id, stop.point as geom,
        trip.direction, route.route_id,
        ARRAY[monday, tuesday, wednesday, thursday, friday, saturday, sunday] as service_days
    from stop_time
    join stop on stop.id = stop_time.stop_id
    join trip on trip.id = stop_time.trip_id
    join route on trip.route_id = route.id
    join service on trip.service_id = service.id
    where trip.trip_id = '{}'
    group by stop_time.id, stop.id, trip.id, route.id, service.id
    order by stop_sequence
    ;
    """.format(
        trip_id
    ).lstrip()

    gdf = gpd.read_postgis(query, conn())
    gdf["service_days"] = gdf["service_days"].astype('str')

    # convert the times to human readable format, !IMPORTANT! utcfromtimestamp returns the correct version
    gdf["arrival_time"] = gdf["arrival_time"].apply(lambda d: datetime.utcfromtimestamp(d).strftime("%H:%M:%S"))
    gdf["departure_time"] = gdf["departure_time"].apply(lambda d: datetime.utcfromtimestamp(d).strftime("%H:%M:%S"))

    # convert the geom to lat lon
    gdf["lat"] = gdf.apply(lambda row: row["geom"].y, axis=1)
    gdf["lon"] = gdf.apply(lambda row: row["geom"].x, axis=1)

    # find the direction angle of the trip
    gdf["direction_angle"] = direction_angle(gdf.iloc[0].lon, gdf.iloc[0].lat, gdf.iloc[-1].lon, gdf.iloc[-1].lat)

    # calculate the point distance between each stop and shape dist between them
    gdf["shape_dist_between"] = gdf.shape_dist_traveled - gdf.shape_dist_traveled.shift()

    gdf["trip_id"] = trip_id  # set the trip id, no need to fetch from db
    gdf["start_time"] = gdf["arrival_time"].iloc[0]  # set the start time to the first instance of arrival time
    gdf = gdf.fillna(0)  # first will always be NA, set to 0

    # return a new pandas df dropping the geom column
    return pd.DataFrame(gdf.drop(columns="geom"))
s_df = get_stop_time_df('11779.2.60-13-b12-1.22.I', get_conn)
# s_df

In [155]:


vaex.open('./temp.hdf5')[['service_days']][0][0].replace('[', '').replace(']', '').replace(' ', '').split(',')

['False', 'False', 'False', 'True', 'False', 'False', 'False']

In [12]:
from datetime import date

date.today().weekday()

2

In [27]:
df = vaex.open('./deploy.hdf5')
df['arr_dow'] = df.start_date.apply(lambda d: get_dt(d, '%Y%m%d').weekday())
df["keep_trip"] = df.apply(
    lambda sd, dow: sd.replace("[", "").replace("]", "").replace(" ", "").split(",")[dow],
    ["service_days", "arr_dow"],
)
df

#,route_id,direction,stop_id,start_time,timestamp,stop_sequence,start_date,arrival,arrival_time,departure_time,shape_dist_traveled,service_days,lat,lon,direction_angle,shape_dist_between,trip_id,arr_dow,keep_trip
0,60-155-b12-1,0,8220DB000264,11:00:00,2021-03-10 10:07:44,24,20210300.0,0,11:16:48,11:16:48,7818.16,"[False, False, False, False, False, False, True]",53.3535,-6.26226,139.315,518.6,14265.3.60-155-b12-1.89.O,2,False
1,60-155-b12-1,0,8220DB000264,11:00:00,2021-03-10 10:07:44,24,20210300.0,0,11:20:25,11:20:25,7818.16,"[False, False, False, False, True, False, False]",53.3535,-6.26226,139.315,518.6,19522.4.60-155-b12-1.89.O,2,False
2,60-155-b12-1,0,8220DB000264,11:00:00,2021-03-10 10:07:44,24,20210300.0,0,11:20:25,11:20:25,7818.16,"[False, False, False, False, False, True, False]",53.3535,-6.26226,139.315,518.6,9890.2.60-155-b12-1.89.O,2,False


In [26]:
df = df[df.keep_trip == "True"]

df

#,route_id,direction,stop_id,start_time,timestamp,stop_sequence,start_date,arrival,arrival_time,departure_time,shape_dist_traveled,service_days,lat,lon,direction_angle,shape_dist_between,trip_id,dow,keep_trip,arr_dow
,,,,,,,,,,,,,,,,,,,,
