# Making a Kepler map = python 3.11, numpy version 1.23.5

In [5]:
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as np
from matplotlib import pyplot as plt
import importlib.resources
from IPython.display import HTML, display
   
def _get_asset_str(filepath: str) -> bytes:
    pkg, sub = 'keplergl.keplergl', filepath
    return importlib.resources.read_binary(pkg, sub)

In [7]:
path = r"C:\Users\ryani\Desktop\JupyterLab\NY_Citibike_2022"

In [8]:
df = pd.read_csv(os.path.join(path, '02_Prepared_Data', 'reduced_df.csv'))
df

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,date,Precipitation,avgtemp,trips_per_day,tripduration,tripduration(mins),usertype
0,BFD29218AB271154,electric_bike,2022-01-21 13:13:43.392,2022-01-21 13:22:31.463,West End Ave & W 107 St,7650.05,Mt Morris Park W & W 120 St,7685.14,40.802117,-73.968181,40.804038,-73.945925,member,2022-01-21,0.0,-6.0,33598,528,9,member_electric_bike
1,7C953F2FD7BE1302,classic_bike,2022-01-10 11:30:54.162,2022-01-10 11:41:43.422,4 Ave & 3 St,4028.04,Boerum Pl\t& Pacific St,4488.09,40.673746,-73.985649,40.688489,-73.991160,member,2022-01-10,0.0,1.6,34727,649,11,member_classic_bike
2,95893ABD40CED4B8,electric_bike,2022-01-26 10:52:43.096,2022-01-26 11:06:35.227,1 Ave & E 62 St,6753.08,5 Ave & E 29 St,6248.06,40.761227,-73.960940,40.745168,-73.986831,member,2022-01-26,0.0,-2.3,42005,832,14,member_electric_bike
3,F853B50772137378,classic_bike,2022-01-03 08:35:48.247,2022-01-03 09:10:50.475,2 Ave & E 96 St,7338.02,5 Ave & E 29 St,6248.06,40.783964,-73.947167,40.745168,-73.986831,member,2022-01-03,0.0,1.4,33189,2102,35,member_classic_bike
4,7590ADF834797B4B,classic_bike,2022-01-22 14:14:23.043,2022-01-22 14:34:57.474,6 Ave & W 34 St,6364.10,5 Ave & E 29 St,6248.06,40.749640,-73.988050,40.745168,-73.986831,member,2022-01-22,0.0,-5.9,31969,1234,21,member_classic_bike
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29838801,1F223EDAFF420AE3,electric_bike,2022-12-01 20:26:45.847,2022-12-01 20:30:46.012,Avenue D & E 3 St,5436.09,Stanton St & Chrystie St,5523.02,40.720701,-73.977939,40.722293,-73.991475,member,2022-12-01,0.0,5.7,73173,240,4,member_electric_bike
29838802,CFA5C560ACB73B8E,classic_bike,2022-12-26 13:46:34.237,2022-12-26 13:52:43.900,43 Ave & 47 St,6209.05,39 Ave & 45 St,6401.03,40.744806,-73.917290,40.749478,-73.918265,member,2022-12-26,0.0,-4.7,19221,370,6,member_classic_bike
29838803,11C8C5E0DB947B07,classic_bike,2022-12-01 05:56:14.903,2022-12-01 06:06:10.357,Avenue D & E 3 St,5436.09,Bleecker St & Crosby St,5679.08,40.720828,-73.977932,40.726156,-73.995102,member,2022-12-01,0.0,5.7,73173,595,10,member_classic_bike
29838804,5B9B083C534A5964,classic_bike,2022-12-02 11:54:15.871,2022-12-02 12:01:00.747,Montague St & Clinton St,4677.06,Sands St & Jay St,4821.03,40.694271,-73.992327,40.700119,-73.986200,member,2022-12-02,0.0,3.4,74951,405,7,member_classic_bike


In [9]:
# Build a Series of counts for each trip pair
pair_counts = (df.value_counts(subset=['start_station_name', 'end_station_name']).rename('trip_count'))

# Map those counts back to df
df['trip_count'] = df.set_index(['start_station_name', 'end_station_name']) \
                     .index.map(pair_counts) \
                     .fillna(0) \
                     .astype(int)


In [10]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,...,end_lng,member_casual,date,Precipitation,avgtemp,trips_per_day,tripduration,tripduration(mins),usertype,trip_count
0,BFD29218AB271154,electric_bike,2022-01-21 13:13:43.392,2022-01-21 13:22:31.463,West End Ave & W 107 St,7650.05,Mt Morris Park W & W 120 St,7685.14,40.802117,-73.968181,...,-73.945925,member,2022-01-21,0.0,-6.0,33598,528,9,member_electric_bike,54
1,7C953F2FD7BE1302,classic_bike,2022-01-10 11:30:54.162,2022-01-10 11:41:43.422,4 Ave & 3 St,4028.04,Boerum Pl\t& Pacific St,4488.09,40.673746,-73.985649,...,-73.99116,member,2022-01-10,0.0,1.6,34727,649,11,member_classic_bike,44
2,95893ABD40CED4B8,electric_bike,2022-01-26 10:52:43.096,2022-01-26 11:06:35.227,1 Ave & E 62 St,6753.08,5 Ave & E 29 St,6248.06,40.761227,-73.96094,...,-73.986831,member,2022-01-26,0.0,-2.3,42005,832,14,member_electric_bike,55
3,F853B50772137378,classic_bike,2022-01-03 08:35:48.247,2022-01-03 09:10:50.475,2 Ave & E 96 St,7338.02,5 Ave & E 29 St,6248.06,40.783964,-73.947167,...,-73.986831,member,2022-01-03,0.0,1.4,33189,2102,35,member_classic_bike,14
4,7590ADF834797B4B,classic_bike,2022-01-22 14:14:23.043,2022-01-22 14:34:57.474,6 Ave & W 34 St,6364.1,5 Ave & E 29 St,6248.06,40.74964,-73.98805,...,-73.986831,member,2022-01-22,0.0,-5.9,31969,1234,21,member_classic_bike,270


In [11]:
df.describe()

Unnamed: 0,started_at,ended_at,start_lat,start_lng,end_lat,end_lng,date,Precipitation,avgtemp,trips_per_day,tripduration,tripduration(mins),trip_count
count,29838806,29838806,29838810.0,29838810.0,29801410.0,29801410.0,29838806,29838170.0,29838170.0,29838810.0,29838810.0,29838810.0,29838810.0
mean,2022-07-18 08:40:54.299818752,2022-07-18 08:59:31.239158272,40.74057,-73.97525,40.74035,-73.97535,2022-07-17 18:06:57.077650432,2.36408,17.12278,95189.36,1116.939,18.61491,364.06
min,2021-01-30 17:30:45.544000,2022-01-01 00:00:09.459000,40.62737,-74.02802,40.56,-74.28,2021-01-30 00:00:00,0.0,-11.7,1.0,-3437.0,-57.0,1.0
25%,2022-05-14 18:36:59.667000064,2022-05-14 19:01:05.662749952,40.71605,-73.99379,40.71602,-73.9938,2022-05-14 00:00:00,0.0,11.4,74873.0,348.0,6.0,57.0
50%,2022-07-23 00:39:40.275000064,2022-07-23 01:03:29.442500096,40.73936,-73.98166,40.73932,-73.98166,2022-07-23 00:00:00,0.0,17.9,103639.0,610.0,10.0,169.0
75%,2022-09-26 13:22:41.772250112,2022-09-26 13:41:00.492999936,40.76319,-73.95928,40.76313,-73.95943,2022-09-26 00:00:00,0.8,24.1,117321.0,1080.0,18.0,409.0
max,2022-12-31 23:58:19.206000,2022-12-31 23:59:55.708000,40.88398,-73.87859,41.06,-73.75,2022-12-31 00:00:00,45.0,31.3,135072.0,46911480.0,781858.0,12041.0
std,,,0.03891787,0.02580543,0.03885558,0.02582301,,6.063558,8.553446,27885.31,30613.71,510.2291,653.5627


In [12]:
df.shape

(29838806, 21)

In [13]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'Precipitation', 'avgtemp', 'trips_per_day',
       'tripduration', 'tripduration(mins)', 'usertype', 'trip_count'],
      dtype='object')

In [14]:
df_map = df[['start_station_name', 'start_lat', 'start_lng', 'end_station_name', 'end_lat', 'end_lng', 'trip_count']]
df_map.head()

Unnamed: 0,start_station_name,start_lat,start_lng,end_station_name,end_lat,end_lng,trip_count
0,West End Ave & W 107 St,40.802117,-73.968181,Mt Morris Park W & W 120 St,40.804038,-73.945925,54
1,4 Ave & 3 St,40.673746,-73.985649,Boerum Pl\t& Pacific St,40.688489,-73.99116,44
2,1 Ave & E 62 St,40.761227,-73.96094,5 Ave & E 29 St,40.745168,-73.986831,55
3,2 Ave & E 96 St,40.783964,-73.947167,5 Ave & E 29 St,40.745168,-73.986831,14
4,6 Ave & W 34 St,40.74964,-73.98805,5 Ave & E 29 St,40.745168,-73.986831,270


In [15]:
df_map_drop = df_map.copy()
df_map_drop.drop_duplicates(inplace=True)

In [16]:
df_map_drop.dropna(inplace=True)

In [17]:
df_map_drop.shape

(5035608, 7)

In [36]:
df_map_drop.rename(columns={
    'Latitude_x': 'start_lat',
    'Longitude_x': 'start_lng',
    'Latitude_y': 'end_lat',
    'Longitude_y': 'end_lng'
}, inplace=True)

In [37]:
df_map_drop.head()

Unnamed: 0,start_station_name,start_lat,start_lng,end_station_name,end_lat,end_lng,trip_count
0,West End Ave & W 107 St,40.802117,-73.968181,Mt Morris Park W & W 120 St,40.804038,-73.945925,54
1,4 Ave & 3 St,40.673746,-73.985649,Boerum Pl\t& Pacific St,40.688489,-73.99116,44
2,1 Ave & E 62 St,40.761227,-73.96094,5 Ave & E 29 St,40.745168,-73.986831,55
3,2 Ave & E 96 St,40.783964,-73.947167,5 Ave & E 29 St,40.745168,-73.986831,14
4,6 Ave & W 34 St,40.74964,-73.98805,5 Ave & E 29 St,40.745168,-73.986831,270


In [32]:
df_map_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5035608 entries, 0 to 29838801
Data columns (total 7 columns):
 #   Column              Dtype  
---  ------              -----  
 0   start_station_name  object 
 1   Latitude_x          float64
 2   Longitude_x         float64
 3   end_station_name    object 
 4   Latitude_y          float64
 5   Longitude_y         float64
 6   trip_count          int32  
dtypes: float64(4), int32(1), object(2)
memory usage: 288.1+ MB


In [38]:
df_map_drop.to_csv(os.path.join(path, '02_Prepared_Data', 'kepler_csv.csv'), index=False)

In [44]:
df_kepler = pd.read_csv(r"C:\Users\ryani\Desktop\JupyterLab\NY_Citibike_2022\02_Prepared_Data\kepler_csv.csv")

In [45]:
df_kepler.replace([np.inf, -np.inf], np.nan, inplace=True)

In [46]:
df_kepler.dropna(inplace=True)

In [47]:
df_kepler.replace(r'\t', ' ', regex=True, inplace=True)
df_kepler.to_csv(os.path.join(path, '02_Prepared_Data', 'kepler_clean_csv.csv'), index=False, encoding='utf-8')

In [48]:
map_1.add_data(data=df_kepler, name='data_2')

In [49]:
df_kepler.head()

Unnamed: 0.1,Unnamed: 0,start_station_name,start_lat,start_lng,end_station_name,end_lat,end_lng,trip_count
0,0,West End Ave & W 107 St,40.802117,-73.968181,Mt Morris Park W & W 120 St,40.804038,-73.945925,54
1,1,4 Ave & 3 St,40.673746,-73.985649,Boerum Pl\t& Pacific St,40.688489,-73.99116,44
2,2,1 Ave & E 62 St,40.761227,-73.96094,5 Ave & E 29 St,40.745168,-73.986831,55
3,3,2 Ave & E 96 St,40.783964,-73.947167,5 Ave & E 29 St,40.745168,-73.986831,14
4,4,6 Ave & W 34 St,40.74964,-73.98805,5 Ave & E 29 St,40.745168,-73.986831,270


In [53]:
df_kepler.drop('Unnamed: 0', axis=1, inplace=True)

In [55]:
df_kepler.head()

Unnamed: 0,start_station_name,start_lat,start_lng,end_station_name,end_lat,end_lng,trip_count
0,West End Ave & W 107 St,40.802117,-73.968181,Mt Morris Park W & W 120 St,40.804038,-73.945925,54
1,4 Ave & 3 St,40.673746,-73.985649,Boerum Pl\t& Pacific St,40.688489,-73.99116,44
2,1 Ave & E 62 St,40.761227,-73.96094,5 Ave & E 29 St,40.745168,-73.986831,55
3,2 Ave & E 96 St,40.783964,-73.947167,5 Ave & E 29 St,40.745168,-73.986831,14
4,6 Ave & W 34 St,40.74964,-73.98805,5 Ave & E 29 St,40.745168,-73.986831,270


In [54]:
df_test = pd.DataFrame({
    'lat': [40.7128, 40.7060],
    'lng': [-74.0060, -74.0086],
    'name': ['Point A', 'Point B']
})

map_2 = KeplerGl(height=500)
map_2.add_data(data=df_test, name='Test Points')
map_2

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'Test Points': {'index': [0, 1], 'columns': ['lat', 'lng', 'name'], 'data': [[40.7128, -74.006,…

In [67]:
for col in ['start_lat', 'start_lng', 'end_lat', 'end_lng']:
    df_kepler[col] = pd.to_numeric(df_kepler[col], errors='coerce')

In [70]:
df_kepler.replace([np.inf, -np.inf], np.nan, inplace=True)
df_kepler.dropna(subset=['start_lat', 'start_lng', 'end_lat', 'end_lng'], inplace=True)

In [71]:
df_kepler_final = df_kepler[['start_lat', 'start_lng', 'end_lat', 'end_lng', 'trip_count']].copy()

In [72]:
df_kepler_final.head()

Unnamed: 0,start_lat,start_lng,end_lat,end_lng,trip_count
0,40.802117,-73.968181,40.804038,-73.945925,54
1,40.673746,-73.985649,40.688489,-73.99116,44
2,40.761227,-73.96094,40.745168,-73.986831,55
3,40.783964,-73.947167,40.745168,-73.986831,14
4,40.74964,-73.98805,40.745168,-73.986831,270


In [73]:
df_kepler.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5004655 entries, 0 to 5035607
Data columns (total 7 columns):
 #   Column              Dtype  
---  ------              -----  
 0   start_station_name  object 
 1   start_lat           float64
 2   start_lng           float64
 3   end_station_name    object 
 4   end_lat             float64
 5   end_lng             float64
 6   trip_count          int64  
dtypes: float64(4), int64(1), object(2)
memory usage: 305.5+ MB


In [75]:
map_1 = KeplerGl(height=600)
map_1.add_data(data=df_kepler_final, name='Citibike NY 2022')
map_1

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'Citibike NY 2022': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…

## I changed the colour points to be green at start and red at end, this felt appropriate and colours match nicely. I reduced the size of the lines so that congregated lines show up better. The Arc begins light green and ends pinkinsh, following the colour style. Point size reduced as well.

## A large amount of people are cycling to or down Empire state trail - perhaps exercising? A large amoutn of trips end at 10th Ave and W 26th Street. This area has art galleries, a park, and a night club. A large amount of trips are to or around central park - again perhaps recreation. There are also a large amount of trips from residential areas south of Central Park to the nearby Theatre Area at 10th Ave and 52/53rd street.

In [76]:
config = map_1.config

In [77]:
config

{'version': 'v1',
 'config': {'visState': {'filters': [{'dataId': ['Citibike NY 2022'],
     'id': 'ygxx5iwu',
     'name': ['trip_count'],
     'type': 'range',
     'value': [990, 999],
     'plotType': 'histogram',
     'animationWindow': 'free',
     'yAxis': None,
     'view': 'side',
     'speed': 1,
     'enabled': True}],
   'layers': [{'id': 'sces4bl',
     'type': 'point',
     'config': {'dataId': 'Citibike NY 2022',
      'label': 'start',
      'color': [82, 119, 108],
      'highlightColor': [252, 242, 26, 255],
      'columns': {'lat': 'start_lat', 'lng': 'start_lng'},
      'isVisible': True,
      'visConfig': {'radius': 1,
       'fixedRadius': False,
       'opacity': 0.8,
       'outline': False,
       'thickness': 2,
       'strokeColor': None,
       'colorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
       

In [78]:
map_1.save_to_html(file_name = 'NY_Citibike_2022.html', read_only = False, config = config)

Map saved to NY_Citibike_2022.html!
