# Making a Kepler map = python 3.11, numpy version 1.23.5

In [1]:
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as np
from matplotlib import pyplot as plt
import importlib.resources
from IPython.display import HTML, display
   
def _get_asset_str(filepath: str) -> bytes:
    pkg, sub = 'keplergl.keplergl', filepath
    return importlib.resources.read_binary(pkg, sub)

In [2]:
path = r"C:\Users\ryani\Desktop\JupyterLab\NY_Citibike_2022"

In [3]:
df = pd.read_csv(os.path.join(path, '02_Prepared_Data', 'reduced_df.csv'), index_col=0, low_memory=False,
dtype={
        "start_station_name": "string",
        "end_station_name": "string"})

df

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,...,member_casual,date,Precipitation,avgtemp,trips_per_day,tripduration,tripduration(mins),usertype,month,value
23031359,51561596962D6D34,classic_bike,2022-10-19 19:05:17.329,2022-10-19 19:10:58.132,Broadway & E 21 St,6098.10,E 9 St & 5 Ave,5872.10,40.739888,-73.989586,...,member,2022-10-19,0.0,10.0,99852,341,6,member_classic_bike,10,1
4214833,8EC10B67BE1C76F4,classic_bike,2022-04-13 14:57:39.816,2022-04-13 15:43:47.279,W 22 St & 8 Ave,6224.03,S 5 Pl & S 5 St,5125.03,40.744751,-73.999154,...,casual,2022-04-13,0.0,15.4,98764,2767,46,casual_classic_bike,4,1
27380825,D8E629127787A5D5,classic_bike,2022-11-12 16:07:20.481,2022-11-12 16:18:02.598,Little West St & 1 Pl,5001.08,South St & Gouverneur Ln,4953.04,40.705693,-74.016777,...,member,2022-11-12,2.3,19.7,113802,642,11,member_classic_bike,11,1
26400613,4FF2A3FCAC3D21C6,electric_bike,2022-11-09 07:24:23.803,2022-11-09 07:32:16.316,Morton St & West St,5772.07,Spruce St & Nassau St,5137.10,40.731480,-74.010915,...,member,2022-11-09,0.0,8.7,95500,473,8,member_electric_bike,11,1
28333068,0FCCB939FC94D28C,electric_bike,2022-12-29 14:49:21.643,2022-12-29 14:54:01.034,Wadsworth Ave & W 179 St,8336.02,Broadway & W 165 St,8191.01,40.848467,-73.936064,...,member,2022-12-29,0.0,6.4,51674,279,5,member_electric_bike,12,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17567286,8686ABE8ACE7AA7A,classic_bike,2022-08-01 15:48:27.651,2022-08-01 15:55:53.935,Central Park West & W 72 St,7141.07,W 63 St & Broadway,7052.01,40.775794,-73.976206,...,member,2022-08-01,10.4,23.1,98167,446,7,member_classic_bike,8,1
15851894,BB5E7E58723263A5,classic_bike,2022-07-29 14:52:06.128,2022-07-29 14:56:29.969,E 20 St & Park Ave,6055.08,Lafayette St & E 8 St,5788.13,40.738274,-73.987520,...,member,2022-07-29,0.5,28.2,114611,264,4,member_classic_bike,7,1
13043140,67583EEF35D34EF0,classic_bike,2022-07-26 12:21:12.289,2022-07-26 12:53:40.960,Bushwick Ave & Stagg St,5140.06,St Johns Pl & Washington Ave,4001.09,40.709897,-73.940080,...,member,2022-07-26,0.0,25.9,122455,1949,32,member_classic_bike,7,1
12892275,BEF5594676FB8722,classic_bike,2022-07-24 13:07:01.807,2022-07-24 13:21:31.825,30 Ave & 41 St,6812.06,Shore Blvd & Astoria Park,7271.01,40.763422,-73.914142,...,member,2022-07-24,0.0,31.1,99639,870,15,member_classic_bike,7,1


In [4]:
# Build a Series of counts for each trip pair
pair_counts = (df.value_counts(subset=['start_station_name', 'end_station_name']).rename('trip_count'))

# Map those counts back to df
df['trip_count'] = df.set_index(['start_station_name', 'end_station_name']) \
                     .index.map(pair_counts) \
                     .fillna(0) \
                     .astype(int)


In [5]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,...,date,Precipitation,avgtemp,trips_per_day,tripduration,tripduration(mins),usertype,month,value,trip_count
23031359,51561596962D6D34,classic_bike,2022-10-19 19:05:17.329,2022-10-19 19:10:58.132,Broadway & E 21 St,6098.1,E 9 St & 5 Ave,5872.1,40.739888,-73.989586,...,2022-10-19,0.0,10.0,99852,341,6,member_classic_bike,10,1,64
4214833,8EC10B67BE1C76F4,classic_bike,2022-04-13 14:57:39.816,2022-04-13 15:43:47.279,W 22 St & 8 Ave,6224.03,S 5 Pl & S 5 St,5125.03,40.744751,-73.999154,...,2022-04-13,0.0,15.4,98764,2767,46,casual_classic_bike,4,1,3
27380825,D8E629127787A5D5,classic_bike,2022-11-12 16:07:20.481,2022-11-12 16:18:02.598,Little West St & 1 Pl,5001.08,South St & Gouverneur Ln,4953.04,40.705693,-74.016777,...,2022-11-12,2.3,19.7,113802,642,11,member_classic_bike,11,1,67
26400613,4FF2A3FCAC3D21C6,electric_bike,2022-11-09 07:24:23.803,2022-11-09 07:32:16.316,Morton St & West St,5772.07,Spruce St & Nassau St,5137.1,40.73148,-74.010915,...,2022-11-09,0.0,8.7,95500,473,8,member_electric_bike,11,1,3
28333068,0FCCB939FC94D28C,electric_bike,2022-12-29 14:49:21.643,2022-12-29 14:54:01.034,Wadsworth Ave & W 179 St,8336.02,Broadway & W 165 St,8191.01,40.848467,-73.936064,...,2022-12-29,0.0,6.4,51674,279,5,member_electric_bike,12,1,10


In [6]:
df.describe()

Unnamed: 0,start_lat,start_lng,end_lat,end_lng,Precipitation,avgtemp,trips_per_day,tripduration,tripduration(mins),month,value,trip_count
count,2983817.0,2983817.0,2980142.0,2980142.0,2983817.0,2983817.0,2983817.0,2983817.0,2983817.0,2983817.0,2983817.0,2983817.0
mean,40.74055,-73.97525,40.74035,-73.97535,2.367569,17.12533,95188.82,1090.162,18.16862,7.047576,1.0,37.30713
std,0.03892089,0.02579353,0.03885638,0.02581259,6.067513,8.555726,27872.8,15314.18,255.2374,2.912009,0.0,65.50601
min,40.63332,-74.02747,40.56,-74.1,0.0,-11.7,2848.0,-3341.0,-56.0,1.0,1.0,0.0
25%,40.71602,-73.99379,40.71582,-73.9938,0.0,11.4,74873.0,348.0,6.0,5.0,1.0,6.0
50%,40.73936,-73.98163,40.73932,-73.98166,0.0,17.9,103639.0,610.0,10.0,7.0,1.0,18.0
75%,40.76319,-73.95928,40.76313,-73.95943,0.8,24.1,117321.0,1079.0,18.0,9.0,1.0,42.0
max,40.88238,-73.87859,40.96,-73.78,45.0,31.3,135072.0,13377800.0,222963.0,12.0,1.0,1165.0


In [7]:
df.shape

(2983817, 23)

In [8]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'Precipitation', 'avgtemp', 'trips_per_day',
       'tripduration', 'tripduration(mins)', 'usertype', 'month', 'value',
       'trip_count'],
      dtype='object')

In [9]:
df_map = df[['start_station_name', 'start_lat', 'start_lng', 'end_station_name', 'end_lat', 'end_lng', 'trip_count']]
df_map.head()

Unnamed: 0,start_station_name,start_lat,start_lng,end_station_name,end_lat,end_lng,trip_count
23031359,Broadway & E 21 St,40.739888,-73.989586,E 9 St & 5 Ave,40.732759,-73.995817,64
4214833,W 22 St & 8 Ave,40.744751,-73.999154,S 5 Pl & S 5 St,40.710451,-73.960876,3
27380825,Little West St & 1 Pl,40.705693,-74.016777,South St & Gouverneur Ln,40.703554,-74.006702,67
26400613,Morton St & West St,40.73148,-74.010915,Spruce St & Nassau St,40.711464,-74.005524,3
28333068,Wadsworth Ave & W 179 St,40.848467,-73.936064,Broadway & W 165 St,40.839137,-73.941409,10


In [10]:
df_map_drop = df_map.copy()
df_map_drop.drop_duplicates(inplace=True)

In [11]:
df_map_drop.dropna(inplace=True)

In [12]:
df_map_drop.shape

(833324, 7)

In [13]:
df_map_drop.rename(columns={
    'Latitude_x': 'start_lat',
    'Longitude_x': 'start_lng',
    'Latitude_y': 'end_lat',
    'Longitude_y': 'end_lng'
}, inplace=True)

In [14]:
df_map_drop.head()

Unnamed: 0,start_station_name,start_lat,start_lng,end_station_name,end_lat,end_lng,trip_count
23031359,Broadway & E 21 St,40.739888,-73.989586,E 9 St & 5 Ave,40.732759,-73.995817,64
4214833,W 22 St & 8 Ave,40.744751,-73.999154,S 5 Pl & S 5 St,40.710451,-73.960876,3
27380825,Little West St & 1 Pl,40.705693,-74.016777,South St & Gouverneur Ln,40.703554,-74.006702,67
26400613,Morton St & West St,40.73148,-74.010915,Spruce St & Nassau St,40.711464,-74.005524,3
28333068,Wadsworth Ave & W 179 St,40.848467,-73.936064,Broadway & W 165 St,40.839137,-73.941409,10


In [15]:
df_kepler = df_map_drop.drop(columns=["start_station_name", "end_station_name"])

In [16]:
df_kepler.replace([np.inf, -np.inf], np.nan, inplace=True)

In [17]:
df_kepler.dropna(inplace=True)

In [18]:
df_kepler.replace(r'\t', ' ', regex=True, inplace=True)
df_kepler.to_csv(os.path.join(path, '02_Prepared_Data', 'kepler_clean_csv.csv'), index=False, encoding='utf-8')

In [19]:
df_kepler.head()

Unnamed: 0,start_lat,start_lng,end_lat,end_lng,trip_count
23031359,40.739888,-73.989586,40.732759,-73.995817,64
4214833,40.744751,-73.999154,40.710451,-73.960876,3
27380825,40.705693,-74.016777,40.703554,-74.006702,67
26400613,40.73148,-74.010915,40.711464,-74.005524,3
28333068,40.848467,-73.936064,40.839137,-73.941409,10


In [20]:
for col in ['start_lat', 'start_lng', 'end_lat', 'end_lng']:
    df_kepler[col] = pd.to_numeric(df_kepler[col], errors='coerce')

In [21]:
df_kepler.replace([np.inf, -np.inf], np.nan, inplace=True)
df_kepler.dropna(subset=['start_lat', 'start_lng', 'end_lat', 'end_lng'], inplace=True)

In [22]:
df_kepler_final = df_kepler[['start_lat', 'start_lng', 'end_lat', 'end_lng', 'trip_count']].copy()

In [23]:
df_kepler_final.head()

Unnamed: 0,start_lat,start_lng,end_lat,end_lng,trip_count
23031359,40.739888,-73.989586,40.732759,-73.995817,64
4214833,40.744751,-73.999154,40.710451,-73.960876,3
27380825,40.705693,-74.016777,40.703554,-74.006702,67
26400613,40.73148,-74.010915,40.711464,-74.005524,3
28333068,40.848467,-73.936064,40.839137,-73.941409,10


In [24]:
df_kepler.info()

<class 'pandas.core.frame.DataFrame'>
Index: 833324 entries, 23031359 to 13043140
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   start_lat   833324 non-null  float64
 1   start_lng   833324 non-null  float64
 2   end_lat     833324 non-null  float64
 3   end_lng     833324 non-null  float64
 4   trip_count  833324 non-null  int32  
dtypes: float64(4), int32(1)
memory usage: 35.0 MB


In [25]:
map_1 = KeplerGl(height=600)
map_1.add_data(data=df_kepler_final, name='Citibike NY 2022')
map_1

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'Citibike NY 2022': {'index': [23031359, 4214833, 27380825, 26400613, 28333068, 4034350, 360836…

## I changed the colour points to be green at start and red at end, this felt appropriate and colours match nicely. I reduced the size of the lines so that congregated lines show up better. The Arc begins light green and ends pinkinsh, following the colour style. Point size reduced as well.

## A large amount of people are cycling to or down Empire state trail - perhaps exercising? A large amoutn of trips end at 10th Ave and W 26th Street. This area has art galleries, a park, and a night club. A large amount of trips are to or around central park - again perhaps recreation. There are also a large amount of trips from residential areas south of Central Park to the nearby Theatre Area at 10th Ave and 52/53rd street.

In [29]:
config = map_1.config

In [30]:
config

{'version': 'v1',
 'config': {'visState': {'filters': [{'dataId': ['Citibike NY 2022'],
     'id': 'o0pxc9lra',
     'name': ['trip_count'],
     'type': 'range',
     'value': [99, 99],
     'plotType': 'histogram',
     'animationWindow': 'free',
     'yAxis': None,
     'view': 'side',
     'speed': 1,
     'enabled': True}],
   'layers': [{'id': 'kzxkpyp',
     'type': 'point',
     'config': {'dataId': 'Citibike NY 2022',
      'label': 'start',
      'color': [61, 122, 62],
      'highlightColor': [252, 242, 26, 255],
      'columns': {'lat': 'start_lat', 'lng': 'start_lng'},
      'isVisible': True,
      'visConfig': {'radius': 10,
       'fixedRadius': False,
       'opacity': 0.8,
       'outline': False,
       'thickness': 2,
       'strokeColor': None,
       'colorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
        

In [31]:
map_1.save_to_html(file_name = 'NY_Citibike_2022.html', read_only = False, config = config)

Map saved to NY_Citibike_2022.html!
