In [1]:
import koda.koda_fetch as kf
import koda.koda_parsing as kp

In [2]:
# Download GTFS data
operator = "xt"
date = "2023-01-05"
feed_type = "TripUpdates"
print(f"Fetching GTFS static data for {operator} on {date}")
static_zip_path = kf.fetch_gtfs_static(operator, date)
print(f"Fetching GTFS realtime data for {operator} on {date}")
rt_zip_path = kf.fetch_gtfs_realtime(operator, feed_type, date)

Fetching GTFS static data for xt on 2023-01-05
File already exists.
Fetching GTFS realtime data for xt on 2023-01-05
File already exists.


In [3]:
# Unzip and parse GTFS data
year, month, day = date.split("-")
hour = "12"

# NOTE: The static data is in GTFS txt files in a flat folder structure
static_folder_path = kp.unzip_data(static_zip_path)
print(f"Unzipped static data to {static_folder_path}")

# NOTE: The realtime data is in Protocol Buffer files in a nested folder structure
rt_folder_path = kp.unzip_data(rt_zip_path)
print(f"Unzipped realtime data to {rt_folder_path}")

df = kp.read_rt_folder_to_df(rt_folder_path, operator, feed_type, year, month, day, hour)

print(f"Data for {operator} on {date} at {hour}")
print(df.head())
print(df.iloc[0])

rt_feather_path = f"{rt_folder_path}/{operator}-{feed_type.lower()}-{year}-{month}-{day}T{hour}.feather"
df.to_feather(rt_feather_path, compression='zstd', compression_level=9)

Unzipping ./dev_data/koda_download/xt-gtfs-static-2023-01-05.7z
File already unzipped.
Unzipped static data to ./dev_data/koda_data\xt-gtfs-static-2023-01-05
Unzipping ./dev_data/koda_download/xt-gtfs-realtime-2023-01-05.7z
File already unzipped.
Unzipped realtime data to ./dev_data/koda_data\xt-gtfs-realtime-2023-01-05
Data for xt on 2023-01-05 at 12
   index                  id             trip_id start_date  \
0      1  217990500824825347  217990000029118619   20230105   
1     28  217990500824817629  217990000029112087   20230105   
2     43  217990500824817629  217990000029112087   20230105   
3     91  217990500829647287  217990000029820018   20230105   
4     92  217990500829647287  217990000029820018   20230105   

  schedule_relationship   timestamp        vehicle_id  stop_sequence  \
0             SCHEDULED  1672916963  9031021000444431             87   
1             SCHEDULED  1672916953  9031021000444427             85   
2             SCHEDULED  1672916953  90310210004444

In [5]:
import pandas as pd

# Read the saved feather file
df = pd.read_feather("./dev_data/koda_data/xt-gtfs-realtime-2023-01-05/xt-tripupdates-2023-01-05T12.feather")
df

Unnamed: 0,index,id,trip_id,start_date,schedule_relationship,timestamp,vehicle_id,stop_sequence,stop_id,arrival_delay,arrival_time,arrival_uncertainty,departure_delay,departure_time,departure_uncertainty
0,1,217990500824825347,217990000029118619,20230105,SCHEDULED,1672916963,9031021000444431,87,9022021421076002,175,1672916399,0.0,175,1672916399,0.0
1,28,217990500824817629,217990000029112087,20230105,SCHEDULED,1672916953,9031021000444427,85,9022021482079002,185,1672916419,0.0,185,1672916419,0.0
2,43,217990500824817629,217990000029112087,20230105,SCHEDULED,1672916953,9031021000444427,100,9022021482039002,329,1672917029,,332,1672917032,
3,91,217990500829647287,217990000029820018,20230105,SCHEDULED,1672916488,,1,9022021490014017,-475,1672912685,0.0,-1,1672913159,0.0
4,92,217990500829647287,217990000029820018,20230105,SCHEDULED,1672916488,,2,9022021484059044,-83,1672916317,0.0,12,1672916412,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39113,2858,217990500813249978,217990000029640917,20230105,SCHEDULED,1672919893,9031021000444342,23,9022021484377001,100,1672921484,,104,1672921488,
39114,2859,217990500813249978,217990000029640917,20230105,SCHEDULED,1672919893,9031021000444342,24,9022021484376001,99,1672921516,,99,1672921516,
39115,2860,217990500813249978,217990000029640917,20230105,SCHEDULED,1672919893,9031021000444342,25,9022021484374001,103,1672921541,,107,1672921545,
39116,2861,217990500813249978,217990000029640917,20230105,SCHEDULED,1672919893,9031021000444342,26,9022021484375023,25,1672921585,,25,1672921585,
