In [1]:
import json
import os
import geopandas as gpd
import pandas as pd
import numpy as np

import keplergl

from create_shapes import generate_lines


In [2]:
#Retrieve the entire timeline (as points). Used for the system and corridor maps
def retrieve_timeline(file_limit = 1):
    timeline = pd.DataFrame()
    #for csv file in historical speed data/data:
    files = os.listdir("historical speed data/data")
    file_number = 0
    while(file_number < file_limit):
        filename = files[file_number]
        file_number += 1
        if filename.endswith(".csv"):
            #read csv file
            file = pd.read_csv("historical speed data/data/" + filename, dtype={"Time": np.int64, "Route": str, "Header": np.int64, "Trip ID": np.int64, "Speed": np.float64, "x": np.float64, "y": np.float64, "Occupancy Status": np.int64})
            #append to timeline
            timeline = pd.concat([timeline, file], ignore_index=True)

    #turn into geopandas dataframe based on x and y
    timeline = gpd.GeoDataFrame(timeline, geometry=gpd.points_from_xy(timeline.x, timeline.y))
    timeline = timeline.set_crs("EPSG:4326").to_crs("EPSG:26910")

    #turn 'Time' into Datetime column
    timeline['Datetime'] = pd.to_datetime(timeline['Time'], unit='s', utc=True)
    #convert to PST
    timeline['Datetime'] = timeline['Datetime'].dt.tz_convert('America/Los_Angeles')

    return(timeline)

In [3]:
route_segments = generate_lines()
timeline = retrieve_timeline()

#create a buffer around each line in lines, and create a new geodataframe with the buffers
route_segments['line_geom'] = route_segments['geometry']
route_segments['geometry'] = route_segments.buffer(20, cap_style=2)
route_segments['buffer_id'] = route_segments.index
timeline['Hour'] = timeline.Datetime.dt.hour

#spatial merge. Find all the points that are within buffers, and retain buffer geometry.
timeline = gpd.sjoin(route_segments, timeline, how="left", predicate="intersects")

timeline['geometry'] = timeline['line_geom']
timeline = timeline[["Hour", "Speed", "buffer_id", "geometry"]]



INFO:root:Getting segments...
INFO:root:Reading "stop_times.txt".
INFO:root:get trips in stop_times
INFO:root:accessing trips
INFO:root:Reading "routes.txt".
INFO:root:Start date is None. You should either specify a start date or set busiest_date to True.
INFO:root:Reading "trips.txt".
INFO:root:File "calendar.txt" not found.
INFO:root:Reading "calendar_dates.txt".
INFO:root:The busiest date/s of this feed or your selected date range is/are:  ['2025-05-23', '2025-06-20', '2025-06-13', '2025-06-06', '2025-06-27', '2025-05-30'] with 2997 trips.
INFO:root:In the case that more than one busiest date was found, the first one will be considered.
INFO:root:In this case is 2025-05-23.
INFO:root:Reading "stop_times.txt".
INFO:root:_trips is defined in stop_times
INFO:root:Reading "stops.txt".
INFO:root:computing patterns
INFO:root:Reading "shapes.txt".
INFO:root:Projecting stops onto shape...
INFO:root:Interpolating stops onto shape...
INFO:root:Sorting shape points and stops...
INFO:root:segme

In [4]:
#system speed map
y_var = "Speed"
gdf = timeline.groupby(["buffer_id"]).agg({"Speed": "mean", "geometry": "first"}).reset_index()
gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs="EPSG:26910")

gdf = gdf.to_crs("WGS-84")
gdf[y_var] = gdf[y_var].round(1)

gdf['Speed Data'] = gdf[y_var]
gdf['colour'] = np.where(gdf['Speed Data'] > 50, 50, gdf[y_var])
gdf['Speed'] = gdf['Speed Data'].astype(str) + " kmh"

In [5]:
kepler_config = json.load(open("kepler_configs/speed_map.json"))    
map_1 = keplergl.KeplerGl(height=500, data={"Speed": gdf}, config=kepler_config)
map_1

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': 'nu79ugi', 'type': …

In [6]:
#export config
map_config = map_1.config
#export to kepler_configs folder
with open("kepler_configs/speed_map.json", "w") as f:
    json.dump(map_config, f)

In [19]:
#system peak vs off-peak speed map

gdf = timeline.pivot_table(index=["buffer_id"], columns="Hour", values="Speed", aggfunc="mean").reset_index()
gdf = gdf.merge(timeline[["buffer_id", "geometry"]].drop_duplicates(subset=["buffer_id"]), on="buffer_id", how="left")
#calculate a three-hour-window moving average to identify the peak and off-peak speeds
for i in range(1, 22): #centres of the different windows
    gdf["{}-{}-{}".format(i-1, i, i+1)] = gdf[[i-1, i, i+1]].mean(axis=1)
#for each row, identify the highest and lowest values of these windows
cols_to_analyze = gdf.loc[:, "0-1-2":"20-21-22"]
gdf['Peak'] = cols_to_analyze.min(axis=1).round(1)
gdf['Peak Hour'] = cols_to_analyze.idxmin(axis=1)
gdf['Off-Peak'] = cols_to_analyze.max(axis=1).round(1)
gdf['Off-Peak Hour'] = cols_to_analyze.idxmax(axis=1)

gdf['Speed Delta'] = gdf['Off-Peak'] - gdf['Peak']

gdf = gdf[["buffer_id", "geometry", "Peak", "Peak Hour", "Off-Peak", "Off-Peak Hour", "Speed Delta"]]
gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs="EPSG:26910")
gdf = gdf.to_crs("WGS-84")
gdf['Speed Data'] = gdf['Speed Delta'].round(1)
gdf['colour'] = np.where(gdf['Speed Data'] > 50, 50, gdf['Speed Data'])
gdf['Speed Delta'] = gdf['Speed Data'].astype(str) + " km/h"

In [20]:
kepler_config = json.load(open("kepler_configs/delta_map.json"))
map_2 = keplergl.KeplerGl(height=500, data={"Speed Delta": gdf}, config=kepler_config)
map_2

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': 'a4jvfue', 'type': …

In [21]:
#export config
map_config = map_2.config
#export to kepler_configs folder
with open("kepler_configs/delta_map.json", "w") as f:
    json.dump(map_config, f)

In [8]:
#dot map

timeline = retrieve_timeline()

In [9]:
#pick 150000 random points
gdf = timeline
if len(gdf) >= 150000:
    gdf = gdf.sample(n=150000)

#convert datetime to string
#gdf['Datetime'] = gdf['Datetime'].dt.strftime('%Y-%m-%d %H:%M:%S')
#format in plain english

gdf = gdf.drop(columns=['Time'])

#round speed to 1 decimal place
gdf.Speed = gdf.Speed.round(1)

gdf['Speed Data'] = gdf['Speed'].round(1)
gdf['colour'] = np.where(gdf['Speed Data'] > 50, 50, gdf['Speed Data'])
gdf['Speed'] = gdf['Speed Data'].astype(str) + " km/h"

In [10]:
kepler_config = json.load(open("kepler_configs/dot_map.json"))
map_1 = keplergl.KeplerGl(height=500, data={"Speed": gdf}, config=kepler_config)
map_1

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['Speed'], 'id': '5k68gnjoc',…

In [11]:
#export config
map_config = map_1.config
#export to kepler_configs folder
with open("kepler_configs/dot_map.json", "w") as f:
    json.dump(map_config, f)

In [11]:
# Export the Kepler.gl map as an HTML file
map_1.save_to_html(file_name="docs/plots/dot_map.html")

Map saved to docs/plots/dot_map.html!


In [21]:
#corridor map

corridors = gpd.read_file("roads/corridors.geojson").set_crs("EPSG:4326").to_crs("EPSG:26910")

#add names to each corridor: Mckenzie, Fort St West, Fort St East, Foul Bay, Henderson, Quadra
corridors["corridor"] = ["Mckenzie", "Fort St West", "Fort St East", "Foul Bay", "Hillside", "Quadra","Douglas Core","Douglas North", "Pandora West", "Pandora East", "Shelbourne South", "Shelbourne North", "Johnson", "Oak Bay"]
corridors['Average Speed'] = 0
timeline = retrieve_timeline()

In [22]:
selected_routes = {
    "Mckenzie": ["26"],
    "Fort St West": ["14", "15", "11"],
    "Fort St East": ["14", "15", "11"],
    "Foul Bay": ["7", "15"],
    "Hillside": ["4"],
    "Quadra": ["6"],
    "Douglas Core": ["95"],
    "Douglas North": ["95"],
    "Pandora West": ["2", "5", "27", "28"],
    "Pandora East": ["2", "5", "27", "28"],
    "Shelbourne South": ["27", "28"],
    "Shelbourne North": ["27", "28"],
    "Johnson": ["2", "5", "27", "28"],
    "Oak Bay": ["2", "5"]
}

#for map_name, y_var, title in [("system_speed_map", "Speed", "Average All-Day Speed"), ("system_speed_peak_map", "Speed", "Average Speed (8am-11am)"), ("system_peak_variability_map", "Speed Variability", "Speed Variability (8am-11am)")]:


for corridor in corridors.corridor:
    filtered_timeline = timeline[timeline.Route.isin(selected_routes[corridor])].reset_index()
    buffer = corridors[corridors.corridor == corridor].buffer(20, cap_style=2)
    buffer = gpd.GeoDataFrame(buffer, geometry=buffer, crs="EPSG:26910")

    #filter timeline to only include points within buffer
    filtered_timeline = filtered_timeline[filtered_timeline.geometry.within(buffer.unary_union)]

    #calculate average speed and update corridor dataframd
    avg_speed = filtered_timeline.Speed.mean()
    avg_speed = round(avg_speed, 1)
    corridors.loc[corridors.corridor == corridor, "Average Speed"] = avg_speed

corridors = corridors.to_crs("EPSG:4326")

corridors['Speed Data'] = corridors['Average Speed'].round(1)
corridors['colour'] = np.where(corridors['Speed Data'] > 50, 50, corridors['Speed Data'])
corridors['Speed'] = corridors['Speed Data'].astype(str) + " km/h"

In [30]:
kepler_config = json.load(open("kepler_configs/corridor_map.json"))
map_1 = keplergl.KeplerGl(height=1000, data={"Speed": corridors}, config=kepler_config)
map_1

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': '7yfl0ij', 'type': …

In [31]:
config = map_1.config
#export config
with open("kepler_configs/corridor_map.json", "w") as f:
    json.dump(config, f)