#### Importing libraries

In [48]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt
import streamlit as st
from keplergl import KeplerGl
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

### Wrangle data

In [49]:
df = pd.read_csv("data/data_subset.csv", index_col=0)

In [50]:
df.dtypes

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id      float64
end_station_name       object
end_station_id        float64
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
date                   object
avgTemp               float64
_merge                 object
bike_rides_daily        int64
dtype: object

In [51]:
# Reset the index to make the date a column
df.reset_index(inplace=True)

In [52]:
df.dtypes

index                   int64
ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id      float64
end_station_name       object
end_station_id        float64
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
date                   object
avgTemp               float64
_merge                 object
bike_rides_daily        int64
dtype: object

In [53]:
# Create a month column
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.month
df['month'] = df['month'].astype('int')

In [54]:
df.dtypes

index                          int64
ride_id                       object
rideable_type                 object
started_at                    object
ended_at                      object
start_station_name            object
start_station_id             float64
end_station_name              object
end_station_id               float64
start_lat                    float64
start_lng                    float64
end_lat                      float64
end_lng                      float64
member_casual                 object
date                  datetime64[ns]
avgTemp                      float64
_merge                        object
bike_rides_daily               int64
month                          int32
dtype: object

In [55]:
# Create the season column

df['season'] = [
    "winter" if (month == 12 or month == 1 or month == 2)
    else "spring" if (month == 3 or month == 4 or month == 5)
    else "summer" if (month == 6 or month == 7 or month == 8)
    else "fall"
    for month in df['month']
]

In [56]:
df.shape

(1000, 20)

In [57]:
df.columns

Index(['index', 'ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'avgTemp', '_merge', 'bike_rides_daily',
       'month', 'season'],
      dtype='object')

### Create the plotly charts

In [58]:
## Groupby

df['value'] = 1 
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value': 'sum'})
top20 = df_groupby_bar.nlargest(20, 'value')

In [59]:
# Save the top 20 stations as a CSV file
top20.to_csv('top20.csv', index=False)

In [60]:
# Aggregate temperature and bike rides to mean, grouped by date
df_agg = df.groupby(df['date'].dt.date).agg({
    'avgTemp': 'mean',
    'bike_rides_daily': 'mean'
}).reset_index()
df_agg['date'] = pd.to_datetime(df_agg['date'])

In [61]:
df_agg

Unnamed: 0,date,avgTemp,bike_rides_daily
0,2022-01-02,11.4,2.0
1,2022-01-03,1.4,1.0
2,2022-01-04,-2.7,1.0
3,2022-01-06,4.9,3.0
4,2022-01-07,0.7,2.0
...,...,...,...
319,2022-12-23,7.5,1.0
320,2022-12-25,-6.9,1.0
321,2022-12-26,-4.7,1.0
322,2022-12-30,9.3,2.0


In [62]:
df_agg.to_csv('df_agg.csv', index=False)

In [63]:
fig = go.Figure(go.Bar(x = top20['start_station_name'], y = top20['value'], marker={'color': top20['value'],'colorscale': 'Blues'}))
fig.show()

In [64]:
## Bar chart

fig.update_layout(
    title = 'Top 20 most popular bike stations in New York',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 900, height = 600
)

### Dual-Axis Line Charts in Plotly

In [65]:
# Line chart
fig = make_subplots(specs = [[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=df_agg['date'], y=df_agg['bike_rides_daily'], name='Daily bike rides', marker={'color': 'blue'}),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=df_agg['date'], y=df_agg['avgTemp'], name='Daily temperature', marker={'color': 'red'}),
    secondary_y=True
)
fig.update_layout(
title = 'Daily bike trips and temperatures in 2022',
height = 600
)

In [66]:
df.columns

Index(['index', 'ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'avgTemp', '_merge', 'bike_rides_daily',
       'month', 'season', 'value'],
      dtype='object')

In [67]:
df_1 = df.loc[:, ['start_station_name', 'end_station_name', 'date', 'avgTemp', 'value', 'bike_rides_daily', 'season']]

In [68]:
df_1.columns

Index(['start_station_name', 'end_station_name', 'date', 'avgTemp', 'value',
       'bike_rides_daily', 'season'],
      dtype='object')

### Reduce the row and column count

In [69]:
# Create a copy with fewer columns
df_1 = df.drop(columns={'date', 'started_at', 'ended_at', 'ride_id', 'rideable_type', 'start_station_id', 'end_station_id', 'member_casual', 'month'})

In [70]:
df = pd.read_csv("data/data_subset.csv", index_col=0)

In [71]:
df["trip_duration"]=pd.to_datetime(df["ended_at"])-pd.to_datetime(df["started_at"])

In [72]:
df["minutes"]=(df["trip_duration"].dt.days*24*60)+(df["trip_duration"].dt.seconds/60)

In [73]:
df

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,date,avgTemp,_merge,bike_rides_daily,trip_duration,minutes
0,AC76F5E4EFDCE317,electric_bike,2022-01-02 16:20:01,2022-01-02 16:22:34,Lexington Ave & E 111 St,7567.06,E 115 St & Madison Ave,7599.02,40.795412,-73.944123,40.798944,-73.944846,casual,2022-01-02,11.4,both,2,0 days 00:02:33,2.550000
1,E121DC402DB250F1,classic_bike,2022-01-02 14:28:05,2022-01-02 14:58:12,48 St & 5 Ave,3202.06,Bond St & Bergen St,4404.10,40.646351,-74.009271,40.684967,-73.986208,member,2022-01-02,11.4,both,2,0 days 00:30:07,30.116667
2,67E6F94A7B94F776,electric_bike,2022-01-03 20:11:00,2022-01-03 20:15:17,E 156 & Brook Ave,7896.11,E 161 St & Park Ave,7979.17,40.819823,-73.912411,40.824811,-73.916407,member,2022-01-03,1.4,both,1,0 days 00:04:17,4.283333
3,441B0C0506D400C2,classic_bike,2022-01-04 12:43:34,2022-01-04 12:46:59,E 33 St & 1 Ave,6197.08,E 39 St & 2 Ave,6345.08,40.743227,-73.974498,40.748033,-73.973828,casual,2022-01-04,-2.7,both,1,0 days 00:03:25,3.416667
4,99005ACE9002E034,electric_bike,2022-01-06 23:05:29,2022-01-06 23:12:23,W 37 St & 10 Ave,6611.02,W 52 St & 11 Ave,6926.01,40.756604,-73.997901,40.767272,-73.993929,member,2022-01-06,4.9,both,3,0 days 00:06:54,6.900000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1909EEBDB8561EF3,classic_bike,2022-12-25 16:51:22,2022-12-25 17:11:28,Sands St & Jay St,4821.03,Cliff St & Fulton St,5065.14,40.700119,-73.986200,40.708380,-74.004950,member,2022-12-25,-6.9,both,1,0 days 00:20:06,20.100000
996,835FC1877F5B64A2,classic_bike,2022-12-26 15:17:21,2022-12-26 15:37:04,Brooklyn Ave & Dean St,4131.03,Kingston Ave & Park Pl,4016.03,40.676786,-73.944239,40.673080,-73.941910,member,2022-12-26,-4.7,both,1,0 days 00:19:43,19.716667
997,4C5C1A46D62AA1BC,classic_bike,2022-12-30 12:44:40,2022-12-30 12:48:00,E 23 St & 1 Ave,5929.01,E 17 St & 2 Ave,5896.01,40.736502,-73.978095,40.734312,-73.983725,member,2022-12-30,9.3,both,2,0 days 00:03:20,3.333333
998,39BD3E36E8E24D86,classic_bike,2022-12-30 15:22:03,2022-12-30 15:27:33,46 St & 25 Ave,6907.03,Steinway St & Ditmars Blvd,7074.01,40.765992,-73.906829,40.772717,-73.906127,member,2022-12-30,9.3,both,2,0 days 00:05:30,5.500000


In [74]:
st.set_page_config(page_title = 'Divvy Bikes Strategy Dashboard', layout='wide')
st.title("Divvy Bikes Strategy Dashboard")
st.markdown("The dashboard will help with the expansion problems Divvy currently faces")
st.markdown("Right now, Divvy bikes runs into a situation where customers complain about bikes not being avaibale at certain times. This analysis aims to look at the potential reasons behind this.")

DeltaGenerator()

In [75]:
fig = go.Figure(go.Bar(x = top20.index, y = top20['value'], marker={'color': top20['value'],'colorscale': 'Blues'}))
fig.update_layout(
    title = 'Top 20 most popular bike stations in New York',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 900, height = 600
)
st.plotly_chart(fig, use_container_width=True)

DeltaGenerator()

In [76]:
top20

Unnamed: 0,start_station_name,value
135,Broadway & W 58 St,7
267,E 6 St & Avenue B,7
500,W 21 St & 6 Ave,7
555,Washington Pl & 6 Ave,7
160,Central Park S & 6 Ave,6
200,E 11 St & 1 Ave,6
246,E 40 St & Park Ave,6
476,Vesey St & Church St,6
74,8 Ave & W 16 St,5
99,Ave A & E 14 St,5


In [77]:
fig_2 = make_subplots(specs = [[{"secondary_y": True}]])

In [79]:
fig_2.add_trace(
go.Scatter(x = df['date'], y = df['bike_rides_daily'], name = 'Daily bike rides', marker={'color': df['bike_rides_daily'],'color': 'blue'}),
secondary_y = False
)

In [81]:
fig_2.add_trace(
go.Scatter(x=df['date'], y = df['avgTemp'], name = 'Daily temperature', marker={'color': df['avgTemp'],'color': 'red'}),
secondary_y=True
)

In [82]:
fig_2.update_layout(
    title = 'Daily bike trips and temperatures in 2022',
    height = 600
)

In [83]:
st.plotly_chart(fig_2, use_container_width=True)

DeltaGenerator()

In [84]:
path_to_html = "Divvy Bike Trips Aggregated.html" 

In [85]:
with open(path_to_html,'r') as f: 
    html_data = f.read()

In [86]:
st.header("Aggregated Bike Trips in New York")
st.components.v1.html(html_data,height=1000)

DeltaGenerator()