In [45]:
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS 
import numpy as np
from matplotlib import pyplot as plt

In [46]:
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))

DATA_PATH = os.path.join(
    BASE_DIR,
    "Data",
    "Processed",
    "citibike_weather_2022.csv"
)

df = pd.read_csv(DATA_PATH)
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,...,WDF5,WSF2,WSF5,WT01,WT02,WT03,WT04,WT06,WT08,WT09
0,CA5837152804D4B5,electric_bike,2022-01-26 18:50:39,2022-01-26 18:51:53,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,...,320.0,10.7,14.3,,,,,,,
1,BA06A5E45B6601D2,classic_bike,2022-01-28 13:14:07,2022-01-28 13:20:23,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,...,30.0,8.1,11.2,1.0,,,,,1.0,
2,7B6827D7B9508D93,classic_bike,2022-01-10 19:55:13,2022-01-10 20:00:37,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,...,320.0,11.6,15.7,,,,,,,
3,6E5864EA6FCEC90D,electric_bike,2022-01-26 07:54:57,2022-01-26 07:55:22,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,...,320.0,10.7,14.3,,,,,,,
4,E24954255BBDE32D,electric_bike,2022-01-13 18:44:46,2022-01-13 18:45:43,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,...,60.0,5.8,7.6,,,,,,,


In [47]:
df["trip_1"] = 1

In [48]:
for c in ["start_lat", "start_lng", "end_lat", "end_lng"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")

# Keep only rows with valid route info
df_routes = df.dropna(subset=[
    "start_station_name", "end_station_name",
    "start_lat", "start_lng", "end_lat", "end_lng"
]).copy()

# Aggregate trips by route (start->end), count trips
routes_agg = (
    df_routes
    .groupby(["start_station_name", "end_station_name"], as_index=False)
    .agg(
        trip_count=("ride_id", "count"),
        start_lat=("start_lat", "mean"),
        start_lng=("start_lng", "mean"),
        end_lat=("end_lat", "mean"),
        end_lng=("end_lng", "mean"),
    )
    .sort_values("trip_count", ascending=False)
    .head(300)
    .reset_index(drop=True)
)

routes_agg.head()

Unnamed: 0,start_station_name,end_station_name,trip_count,start_lat,start_lng,end_lat,end_lng
0,Hoboken Terminal - Hudson St & Hudson Pl,Hoboken Ave at Monmouth St,5565,40.735937,-74.030316,40.735208,-74.046964
1,South Waterfront Walkway - Sinatra Dr & 1 St,South Waterfront Walkway - Sinatra Dr & 1 St,5439,40.736989,-74.027779,40.736982,-74.027781
2,Marin Light Rail,Grove St PATH,4113,40.714584,-74.042816,40.719586,-74.043117
3,Hoboken Ave at Monmouth St,Hoboken Terminal - Hudson St & Hudson Pl,4083,40.735222,-74.046949,40.735938,-74.030305
4,Grove St PATH,Marin Light Rail,3973,40.719591,-74.043125,40.714584,-74.042817


In [49]:
routes_kepler = routes_agg.replace({np.nan: None})

In [50]:
import sys
sys.executable

'C:\\Users\\User\\anaconda3\\python.exe'

In [51]:
os.makedirs("maps", exist_ok=True)

map_top300 = KeplerGl(height=600)
map_top300.add_data(data=routes_kepler, name="Top 300 Routes")

map_top300.save_to_html(file_name="maps/kepler_top300.html")

print("Saved:", os.path.abspath("maps/kepler_top300.html"))

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to maps/kepler_top300.html!
Saved: C:\Users\User\Downloads\Citibike-Weather-Dashboard\Notebooks\maps\kepler_top300.html


In [52]:
import os
from keplergl import KeplerGl
import numpy as np

os.makedirs("maps", exist_ok=True)

# routes_kepler should already exist
# routes_kepler = routes_agg.replace({np.nan: None})

kepler_config = {
  "version": "v1",
  "config": {
    "mapState": {              # ensures default view is NYC
      "latitude": 40.75,
      "longitude": -73.98,
      "zoom": 10.5,
      "pitch": 0,
      "bearing": 0
    },
    "visState": {
      "layers": [
        {
          "id": "top300_arcs",
          "type": "arc",
          "config": {
            "dataId": "Top 300 Routes",
            "label": "Top 300 Routes (Arcs)",
            "columns": {
              "lat0": "start_lat",
              "lng0": "start_lng",
              "lat1": "end_lat",
              "lng1": "end_lng"
            },
            "isVisible": True
          },
          "visualChannels": {
            "sizeField": {"name": "trip_count", "type": "integer"},
            "sizeScale": "sqrt"
          }
        }
      ]
    }
  }
}

map_top300_cfg = KeplerGl(height=600, config=kepler_config)
map_top300_cfg.add_data(data=routes_kepler, name="Top 300 Routes")
map_top300_cfg.save_to_html(file_name="maps/kepler_top300.html")

print("Saved:", os.path.abspath("maps/kepler_top300.html"))

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to maps/kepler_top300.html!
Saved: C:\Users\User\Downloads\Citibike-Weather-Dashboard\Notebooks\maps\kepler_top300.html


### Customize map (points + arcs)

Point color: Start and end station points are colored using a dark purple theme to show intensity.

Arcs: Enabled to show the direction and volume of trips between locations.

Filter: I added a filter on trip_count to visualize only the most common trips (e.g., those above 1000).

This helps us visually identify the busiest areas and travel routes in the cityâ€”especially Midtown and downtown Manhattan, which seem highly connected.

In [56]:
from keplergl import KeplerGl

map_1 = KeplerGl(height=600)

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


In [59]:
# 5. Add aggregated data to map
map_1.add_data(data=routes_agg, name="trip_connections")

In [58]:
# 6. Export configuration and save map to HTML
config = map_1.config  # current map settings
map_1.save_to_html(file_name="2.5_Advanced_Geospatial_Plotting.html")

Map saved to 2.5_Advanced_Geospatial_Plotting.html!


#### ðŸ’¡ Observations & Insights
After applying filters and reviewing the arcs on the map:

The most common bike trips occur between nearby stations in Manhattan.

Certain hotspots like Penn Station, Central Park, and Wall Street dominate the usage.

The arc widths and filters clearly illustrate commuter behavior across dense business zones.

The map clearly helps us understand user traffic flows and peak usage areas within NYC's CitiBike system.