## 2.5: Advanced Geospatial Plotting Part 1

##### This is the first part of the notebook and was executed in JupyterLab. Part 2 of this notebook was performed in Google Colab as Keplergl would not display properly on JupyterLab.

### Import Libraries and Data

In [1]:
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as np
from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv('Data/nyc_data.csv', index_col = 0)

In [3]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,start_time,date,end_time,avgTemp,_merge
0,CA5837152804D4B5,electric_bike,2022-01-26 18:50:39,2022-01-26 18:51:53,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-26 18:50:39,2022-01-26,2022-01-26 18:51:53,-2.3,both
1,BA06A5E45B6601D2,classic_bike,2022-01-28 13:14:07,2022-01-28 13:20:23,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,2022-01-28 13:14:07,2022-01-28,2022-01-28 13:20:23,0.1,both
2,7B6827D7B9508D93,classic_bike,2022-01-10 19:55:13,2022-01-10 20:00:37,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,2022-01-10 19:55:13,2022-01-10,2022-01-10 20:00:37,1.6,both
3,6E5864EA6FCEC90D,electric_bike,2022-01-26 07:54:57,2022-01-26 07:55:22,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-26 07:54:57,2022-01-26,2022-01-26 07:55:22,-2.3,both
4,E24954255BBDE32D,electric_bike,2022-01-13 18:44:46,2022-01-13 18:45:43,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-13 18:44:46,2022-01-13,2022-01-13 18:45:43,4.0,both


### Data Preprocessing

In [6]:
# Step 1: Create a 'value' column and group by start and end station names to count trips

df['value'] = 1
df_group = df.groupby(['start_station_name', 'end_station_name'])['value'].count().reset_index()

In [5]:
df_group.head()

Unnamed: 0,start_station_name,end_station_name,value
0,11 St & Washington St,11 St & Washington St,1132
1,11 St & Washington St,12 Ave & W 40 St,1
2,11 St & Washington St,12 St & Sinatra Dr N,253
3,11 St & Washington St,14 St Ferry - 14 St & Shipyard Ln,395
4,11 St & Washington St,4 St & Grand St,350


In [7]:
# Step 2: Ensure the latitude and longitude data is unique before merging

start_coords = df[['start_station_name', 'start_lat', 'start_lng']].drop_duplicates(subset=['start_station_name'])
end_coords = df[['end_station_name', 'end_lat', 'end_lng']].drop_duplicates(subset=['end_station_name'])

In [8]:
# Step 3: Merge with origin station details (start_lat, start_lng)

df_group = df_group.merge(start_coords, on='start_station_name', how='left')

In [9]:
# Step 4: Merge with destination station details (end_lat, end_lng)

df_final = df_group.merge(end_coords, on='end_station_name', how='left')

In [10]:
# The final DataFrame 'df_group' now includes unique combinations with trip counts and coordinates

df_final.head()

Unnamed: 0,start_station_name,end_station_name,value,start_lat,start_lng,end_lat,end_lng
0,11 St & Washington St,11 St & Washington St,1132,40.749985,-74.02715,40.749985,-74.02715
1,11 St & Washington St,12 Ave & W 40 St,1,40.749985,-74.02715,40.760875,-74.002777
2,11 St & Washington St,12 St & Sinatra Dr N,253,40.749985,-74.02715,40.750604,-74.02402
3,11 St & Washington St,14 St Ferry - 14 St & Shipyard Ln,395,40.749985,-74.02715,40.752961,-74.024353
4,11 St & Washington St,4 St & Grand St,350,40.749985,-74.02715,40.742258,-74.035111


### Rename Columns

In [11]:
df_final.rename(columns = {'start_station_name':'origin','end_station_name' : 'destination',
'value': 'trips'}, inplace = True)

In [12]:
df_final.head()

Unnamed: 0,origin,destination,trips,start_lat,start_lng,end_lat,end_lng
0,11 St & Washington St,11 St & Washington St,1132,40.749985,-74.02715,40.749985,-74.02715
1,11 St & Washington St,12 Ave & W 40 St,1,40.749985,-74.02715,40.760875,-74.002777
2,11 St & Washington St,12 St & Sinatra Dr N,253,40.749985,-74.02715,40.750604,-74.02402
3,11 St & Washington St,14 St Ferry - 14 St & Shipyard Ln,395,40.749985,-74.02715,40.752961,-74.024353
4,11 St & Washington St,4 St & Grand St,350,40.749985,-74.02715,40.742258,-74.035111


In [13]:
df_final.tail()

Unnamed: 0,origin,destination,trips,start_lat,start_lng,end_lat,end_lng
6948,York St & Marin Blvd,Van Vorst Park,18,40.716615,-74.042412,40.718489,-74.047727
6949,York St & Marin Blvd,Warren St,42,40.716615,-74.042412,40.721124,-74.038051
6950,York St & Marin Blvd,Washington St,16,40.716615,-74.042412,40.724294,-74.035483
6951,York St & Marin Blvd,Willow Ave & 12 St,1,40.716615,-74.042412,40.751867,-74.030377
6952,York St & Marin Blvd,York St & Marin Blvd,47,40.716615,-74.042412,40.716615,-74.042412


### Plot the Map

In [14]:
#save final df

df_final.to_csv('Data/trip_locations.csv')

In [15]:
# Create KeplerGl instance

m = KeplerGl(height = 700, data={"data_1": df_final})
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'data_1':                      origin                        destination  trips  \
0     11 St …