In [3]:
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as np
from matplotlib import pyplot as plt

In [4]:
# Load the dataset

df = pd.read_pickle("merged_bike_data.pkl")

In [3]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'avgTemp'],
      dtype='object')

In [5]:
# Create a value column and group by start and end station

df['value'] = 1  
df_group = df.groupby(['start_station_name', 'end_station_name']).agg(
    value=('value', 'count'),
    start_lat=('start_lat', 'first'),
    start_lon=('start_lng', 'first'),
    end_lat=('end_lat', 'first'),
    end_lon=('end_lng', 'first')
).reset_index()

In [20]:
df_group.head()

Unnamed: 0,start_station_name,end_station_name,value,start_lat,start_lon,end_lat,end_lon
0,1 Ave & E 110 St,1 Ave & E 110 St,791,40.792327,-73.9383,40.792327,-73.9383
1,1 Ave & E 110 St,1 Ave & E 18 St,2,40.792327,-73.9383,40.733812,-73.980544
2,1 Ave & E 110 St,1 Ave & E 30 St,4,40.792327,-73.9383,40.741444,-73.975361
3,1 Ave & E 110 St,1 Ave & E 39 St,1,40.792327,-73.9383,40.74714,-73.97113
4,1 Ave & E 110 St,1 Ave & E 44 St,12,40.792327,-73.9383,40.75002,-73.969053


In [6]:
df_group.rename(columns = {'value': 'trips'}, inplace = True)

In [22]:
print(df_group['trips'].sum())
print(df.shape)

29768282
(29838166, 16)


In [23]:
df_group['trips'].describe()

count    1.013397e+06
mean     2.937475e+01
std      9.925180e+01
min      1.000000e+00
25%      1.000000e+00
50%      4.000000e+00
75%      1.700000e+01
max      1.204100e+04
Name: trips, dtype: float64

In [30]:
df_group.to_csv('df_final_locations_for_map.csv')

In [7]:
df_final = pd.read_csv("df_final_locations_for_map.csv")

In [11]:
# Create the KeplerGl map

m = KeplerGl(height=700, data={"data_1": df_final})


User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


In [12]:
m

KeplerGl(data={'data_1':          Unnamed: 0     start_station_name       end_station_name  trips  \
0        …

In [13]:
import json

# Get the current configuration
config = m.config

In [15]:
# Save configuration to a JSON file
with open("config.json", "w") as outfile:
    json.dump(config, outfile)

In [16]:
# Save interactive map as HTML
m.save_to_html(file_name='Bike_Trips_NYC.html', read_only=False, config=config, center_map=True)

Map saved to Bike_Trips_NYC.html!


On the Kepler.gl map, the bike-sharing trips are visualized using an Arc Layer, where start and end stations are represented in different colors, and the stations are connected by curved lines that show the flow of trips.

To highlight the most significant travel patterns, a filter is applied on the trips field, represented as a slider. By adjusting this slider to the right, trips with lower counts are filtered out, which reduces the visual clutter and reveals the busiest connections between stations.

the stations located around Central Park and those situated in the western part of the city exhibit the busiest connections.