# 01. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import geojson

# 02. Import Data

In [2]:
# Folder Path
path = r'C:\Users\jrper\OneDrive\Documents\Career Foundry Data Analytics Program\Achievement 6'

In [3]:
# Import trips_merged_cleaned.csv
df_trips = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'trips_merged_cleaned.csv'), index_col = False)

In [4]:
# Import Choropleth2.csv
df_choropleth = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'Choropleth2.csv'), index_col = False)

In [5]:
# Import geojson file for Citi Bike locations 
station_geo = r'C:\Users\jrper\OneDrive\Documents\Career Foundry Data Analytics Program\Achievement 6\02 Data\Original Data\citi_bike_locations.geojson'

In [6]:
# Display charts in notebook
%matplotlib inline

In [7]:
# View geojson file contents
f = open(r'C:\Users\jrper\OneDrive\Documents\Career Foundry Data Analytics Program\Achievement 6\02 Data\Original Data\citi_bike_locations.geojson')

# geojson object asa dictionary
data = geojson.load(f)
  
# Iterating through the geojson list
for i in data['features']:
    print(i)

{"geometry": {"coordinates": [-74.037665, 40.715256, 0.0], "type": "Point"}, "properties": {"geo_point_2d": {"lat": 40.7152561007, "lon": -74.0376649783}, "location": "Grand St", "stationid": "JC102"}, "type": "Feature"}
{"geometry": {"coordinates": [-74.040945, 40.716471, 0.0], "type": "Point"}, "properties": {"geo_point_2d": {"lat": 40.7164706135, "lon": -74.0409454012}, "location": "York St", "stationid": "JC096"}, "type": "Feature"}
{"geometry": {"coordinates": [-74.043918, 40.717722, 0.0], "type": "Point"}, "properties": {"geo_point_2d": {"lat": 40.7177223148, "lon": -74.0439179821}, "location": "City Hall", "stationid": "JC003"}, "type": "Feature"}
{"geometry": {"coordinates": [-74.050944, 40.719472, 0.0], "type": "Point"}, "properties": {"geo_point_2d": {"lat": 40.7194722155, "lon": -74.0509438064}, "location": "Montgomery St", "stationid": "JC099"}, "type": "Feature"}
{"geometry": {"coordinates": [-74.050661, 40.724136, 0.0], "type": "Point"}, "properties": {"geo_point_2d": {"l

# 03. Wrangling Procedures

In [8]:
# View first 5 rows of df_trips
df_trips.head()

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender
0,226,2020-01-01 00:04:50.1920,2020-01-01 00:08:37.0370,3186,Grove St PATH,40.719586,-74.043117,3211,Newark Ave,40.721525,-74.046305,29444,Subscriber,1984,2
1,377,2020-01-01 00:16:01.6700,2020-01-01 00:22:19.0800,3186,Grove St PATH,40.719586,-74.043117,3269,Brunswick & 6th,40.726012,-74.050389,26305,Subscriber,1989,2
2,288,2020-01-01 00:17:33.8770,2020-01-01 00:22:22.4420,3186,Grove St PATH,40.719586,-74.043117,3269,Brunswick & 6th,40.726012,-74.050389,29268,Customer,1989,1
3,435,2020-01-01 00:32:05.9020,2020-01-01 00:39:21.0660,3195,Sip Ave,40.730897,-74.063913,3280,Astor Place,40.719282,-74.071262,29278,Customer,1969,0
4,231,2020-01-01 00:46:19.6780,2020-01-01 00:50:11.3440,3186,Grove St PATH,40.719586,-74.043117,3276,Marin Light Rail,40.714584,-74.042817,29276,Subscriber,1983,2


In [9]:
# Convert 'starttime' and 'stoptime' columns to datetime instead of object
df_trips['starttime'] = pd.to_datetime(df_trips['starttime'])
df_trips['stoptime'] = pd.to_datetime(df_trips['stoptime'])

In [10]:
# Ensure datatype successfully changed
df_trips.dtypes

tripduration                        int64
starttime                  datetime64[ns]
stoptime                   datetime64[ns]
start station id                    int64
start station name                 object
start station latitude            float64
start station longitude           float64
end station id                      int64
end station name                   object
end station latitude              float64
end station longitude             float64
bikeid                              int64
usertype                           object
birth year                          int64
gender                              int64
dtype: object

#### Observations: Datatype successfully changed to datetime for 'starttime' and 'stoptime' columns.

In [11]:
# Extract day, month, and year from 'starttime' and 'stoptime' columns
df_trips['day_of_month_starttime'] = df_trips['starttime'].dt.day
df_trips['month_starttime'] = df_trips['starttime'].dt.month
df_trips['year_starttime'] = df_trips['starttime'].dt.year

df_trips['day_of_month_stoptime'] = df_trips['stoptime'].dt.day
df_trips['month_stoptime'] = df_trips['stoptime'].dt.month
df_trips['year_stoptime'] = df_trips['stoptime'].dt.year

In [12]:
# Create columns showing weekday name and month name for 'starttime' and 'stoptime' columns
df_trips['weekday_starttime'] = df_trips['starttime'].dt.day_name()
df_trips['weekday_stoptime'] = df_trips['stoptime'].dt.day_name()

df_trips['month_name_starttime'] = df_trips['starttime'].dt.month_name()
df_trips['month_name_stoptime'] = df_trips['stoptime'].dt.month_name()

In [13]:
# Extract hour of day for 'starttime' and 'stoptime' columns
df_trips['hour_starttime'] = df_trips['starttime'].dt.hour
df_trips['hour_stoptime'] = df_trips['stoptime'].dt.hour

In [14]:
# Create 'customer_age' column
df_trips['customer_age'] = df_trips['year_starttime'] - df_trips['birth year']

In [15]:
# Create column showing starttime counts
df_trips['starttime_counts'] = df_trips.groupby(['start station name'])['starttime'].transform('count')

In [16]:
df_trips['starttime_counts'].describe()

count    379861.000000
mean      11579.704184
std        6657.892034
min           1.000000
25%        6035.000000
50%       10489.000000
75%       15767.000000
max       25629.000000
Name: starttime_counts, dtype: float64

In [17]:
# Create buckets for 'starttime_counts'
df_trips.loc[df_trips['starttime_counts'] <= 2000, 'starttime_buckets'] = 'Lowest Traffic'
df_trips.loc[(df_trips['starttime_counts'] > 2000) & (df_trips['starttime_counts'] <= 4000), 'starttime_buckets'] = 'Low Traffic'
df_trips.loc[(df_trips['starttime_counts'] > 4000) & (df_trips['starttime_counts'] <= 8000), 'starttime_buckets'] = 'Medium Traffic'
df_trips.loc[(df_trips['starttime_counts'] > 8000) & (df_trips['starttime_counts'] <= 10000), 'starttime_buckets'] = 'Medium High Traffic'
df_trips.loc[df_trips['starttime_counts'] > 10000, 'starttime_buckets'] = 'Highest Traffic'

In [18]:
# Ensure all new columns were successfully created
df_trips.head()

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,...,year_stoptime,weekday_starttime,weekday_stoptime,month_name_starttime,month_name_stoptime,hour_starttime,hour_stoptime,customer_age,starttime_counts,starttime_buckets
0,226,2020-01-01 00:04:50.192,2020-01-01 00:08:37.037,3186,Grove St PATH,40.719586,-74.043117,3211,Newark Ave,40.721525,...,2020,Wednesday,Wednesday,January,January,0,0,36,25629,Highest Traffic
1,377,2020-01-01 00:16:01.670,2020-01-01 00:22:19.080,3186,Grove St PATH,40.719586,-74.043117,3269,Brunswick & 6th,40.726012,...,2020,Wednesday,Wednesday,January,January,0,0,31,25629,Highest Traffic
2,288,2020-01-01 00:17:33.877,2020-01-01 00:22:22.442,3186,Grove St PATH,40.719586,-74.043117,3269,Brunswick & 6th,40.726012,...,2020,Wednesday,Wednesday,January,January,0,0,31,25629,Highest Traffic
3,435,2020-01-01 00:32:05.902,2020-01-01 00:39:21.066,3195,Sip Ave,40.730897,-74.063913,3280,Astor Place,40.719282,...,2020,Wednesday,Wednesday,January,January,0,0,51,15767,Highest Traffic
4,231,2020-01-01 00:46:19.678,2020-01-01 00:50:11.344,3186,Grove St PATH,40.719586,-74.043117,3276,Marin Light Rail,40.714584,...,2020,Wednesday,Wednesday,January,January,0,0,37,25629,Highest Traffic


In [19]:
# Ensure all new columns were successfully created
df_trips.columns

Index(['tripduration', 'starttime', 'stoptime', 'start station id',
       'start station name', 'start station latitude',
       'start station longitude', 'end station id', 'end station name',
       'end station latitude', 'end station longitude', 'bikeid', 'usertype',
       'birth year', 'gender', 'day_of_month_starttime', 'month_starttime',
       'year_starttime', 'day_of_month_stoptime', 'month_stoptime',
       'year_stoptime', 'weekday_starttime', 'weekday_stoptime',
       'month_name_starttime', 'month_name_stoptime', 'hour_starttime',
       'hour_stoptime', 'customer_age', 'starttime_counts',
       'starttime_buckets'],
      dtype='object')

#### Observations: All new columns were successfully created.

# 04. Choropleth Map

In [20]:
# Create dataframe of data to plot
data_to_plot = df_trips[['start station name','starttime_counts']]

In [21]:
# View subset
data_to_plot.head()

Unnamed: 0,start station name,starttime_counts
0,Grove St PATH,25629
1,Grove St PATH,25629
2,Grove St PATH,25629
3,Sip Ave,15767
4,Grove St PATH,25629


In [22]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [40.693943, -73.985880], zoom_start = 10.0)
folium.Choropleth(
    geo_data = station_geo, 
    data = data_to_plot,
    columns = ['start station name', 'starttime_counts'],
    key_on = 'properties.location',
    fill_color = 'YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "tripduration").add_to(map)

folium.LayerControl().add_to(map)

map

#### Observations: Markers appear on the map to represent Citi Bike stations, but there aren't different colors to indicate station popularity.

### Assign marker color and popup based on 'start station name' and 'starttime_buckets' 

In [23]:
# Generate base map for New Jersey
def generateBaseMap(default_location=[40.693943, -73.985880], default_zoom_start=10): 
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start, width='50%', height='50%') 
    return base_map

In [24]:
# Call base map function
new_jersey_map = generateBaseMap()
new_jersey_map

In [25]:
# Choose station icon colors based on 'starttime_buckets' column values
station_color = {'Lowest Traffic':'white', 'Low Traffic':'lightblue', 'Medium Traffic':'blue', 'Medium High Traffic':'darkblue', 'Highest Traffic':'black'} 

# Loop through each row in dataframe
for i,row in df_choropleth.iterrows():
    # Define popup content
    iframe = folium.IFrame(f'Start Station Name: {str(row["start station name"])} \n Traffic: {str(row["starttime_buckets"])}')
    
    # Initialize popup using iframe
    popup = folium.Popup(iframe, min_width=300, max_width=300)
    
    # Define icon color
    icon_color = station_color[row['starttime_buckets']]

    # Add each row to New Jersey base map
    folium.Marker(location=[row['start station latitude'],row['start station longitude']],
                  popup = popup, 
                  icon=folium.Icon(color=icon_color, icon='')).add_to(new_jersey_map)
    
new_jersey_map

#### Observations: Colors and popups successfully created. 
#### Marker Colors: black: Highest Traffic, dark blue: Medium High Traffic, blue: Medium Traffic, light blue: Low Traffic, white: Lowest Traffic

# 05. Heat Map

In [26]:
# Generate base map
def generateBaseMap(default_location=[40.693943, -73.985880], default_zoom_start=12): 
    base_map2 = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start, width='50%', height='50%', tiles = "Stamen Toner") 
    return base_map2

In [27]:
from folium.plugins import HeatMap
base_map2 = generateBaseMap() 
HeatMap(data=df_trips[['start station latitude', 'start station longitude', 'starttime_counts']].groupby(['start station latitude', 'start station longitude']).sum().reset_index().values.tolist(), radius=8, max_zoom=13).add_to(base_map2)

base_map2

# 06. Results

#### Grove St. PATH, Newport Pkwy, and Liberty Light Rail are the most popular stations. 
#### JCBS Depot, Leonard Gordon Park, and Jackson Square are least popular. 
#### Many of the Citi Bike stations with the highest traffic are located near PATH (Port Authority Trans-Hudson), which is a rapid rail transit system that connects New Jersey with New York.

# 07. Export Data

In [28]:
# Export maps
map.save('plot_data_choro.html')
new_jersey_map.save('plot_data_choro2.html')
base_map2.save('plot_data_heat.html')

In [29]:
# Export df_trips as final_choropleth_data.csv
df_trips.to_csv(os.path.join(path, '02 Data','Prepared Data', 'final_choropleth_data.csv'), index = False)