# Building dashboard with Streamlit

In [1]:
# Import Libraries

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

## Wrangling Data

In [4]:
# Import the Dataframe
path = r"C:\Users\okumb\Downloads\CitiBike-New-York\.venv\Scripts\NewYork_data.csv"
df = pd.read_csv(path, index_col=0)

In [5]:
df.dtypes 

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id       object
end_station_name       object
end_station_id         object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
DATE                   object
TAVG                  float64
_merge                 object
dtype: object

In [6]:
# rename columns ['DATE':'date'], ['TAVG':'temp_avg'] in df dataframe

df.rename(columns={'DATE':'date', 'TAVG':'temp_avg'}, inplace=True)


In [10]:
df.dtypes 

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id       object
end_station_name       object
end_station_id         object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
date                   object
temp_avg              float64
_merge                 object
dtype: object

In [11]:
# drop '_merge' column in the df dataframe

df.drop(['_merge'], axis=1, inplace=True)


In [12]:
# Create a month column 

df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
df['month'] = df['date'].dt.month
df['month'] = df['month'].astype('int')

In [13]:
# Create the season column

df['season'] = [
"winter" if (month == 12 or 1 <= month <= 4)
    else "spring" if (4 < month <= 5)
    else "summer" if (6 <= month <= 9)
    else "fall"
for month in df['month']
    ]

In [14]:
df.shape

(895485, 17)

In [15]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'temp_avg', 'month', 'season'],
      dtype='object')

In [16]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,date,temp_avg,month,season
0,CA5837152804D4B5,electric_bike,2022-01-26 18:50:39,2022-01-26 18:51:53,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-26,-2.3,1,winter
1,BA06A5E45B6601D2,classic_bike,2022-01-28 13:14:07,2022-01-28 13:20:23,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,2022-01-28,0.1,1,winter
2,7B6827D7B9508D93,classic_bike,2022-01-10 19:55:13,2022-01-10 20:00:37,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,2022-01-10,1.6,1,winter
3,6E5864EA6FCEC90D,electric_bike,2022-01-26 07:54:57,2022-01-26 07:55:22,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-26,-2.3,1,winter
4,E24954255BBDE32D,electric_bike,2022-01-13 18:44:46,2022-01-13 18:45:43,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-13,4.0,1,winter


## Create Plotly Charts

In [17]:
## Groupby

df['value'] = 1 
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value': 'sum'})
top20 = df_groupby_bar.nlargest(20, 'value')

In [25]:
# top20 is the DataFrame with the top 20 stations.
fig = go.Figure(go.Bar(x=top20['start_station_name'], y = top20['value'], marker={'color': top20['value'],'colorscale': 'Blues'}))

# Update the layout to add a title and ensure adequate margin space.
fig.update_layout(
    title='Top 20 Popular Bike Stations in New York',
    xaxis_title='Station Name',
    yaxis_title='Sum of Trips',
    width = 900, height = 600,
    margin=dict(l=40, r=40, t=40, b=100)  # Adjust margins to ensure no clipping
)

fig.show()

## Dual axis line

In [27]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,date,temp_avg,month,season,value
0,CA5837152804D4B5,electric_bike,2022-01-26 18:50:39,2022-01-26 18:51:53,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-26,-2.3,1,winter,1
1,BA06A5E45B6601D2,classic_bike,2022-01-28 13:14:07,2022-01-28 13:20:23,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,2022-01-28,0.1,1,winter,1
2,7B6827D7B9508D93,classic_bike,2022-01-10 19:55:13,2022-01-10 20:00:37,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,2022-01-10,1.6,1,winter,1
3,6E5864EA6FCEC90D,electric_bike,2022-01-26 07:54:57,2022-01-26 07:55:22,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-26,-2.3,1,winter,1
4,E24954255BBDE32D,electric_bike,2022-01-13 18:44:46,2022-01-13 18:45:43,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,2022-01-13,4.0,1,winter,1


In [28]:
df['date'] = pd.to_datetime(df['date'])

In [29]:
df_temp = df.set_index('date')

In [30]:
# Sort the DataFrame by the DATE column in ascending order
df_temp.sort_values(by='date', ascending=True, inplace=True)

In [31]:
# Groupby the count of the trips 
df_group = pd.DataFrame(df_temp.groupby(['date'])['ride_id'].count()).reset_index()

In [33]:
df_group.head()

Unnamed: 0,date,bike_trips_daily
0,2022-01-01,592
1,2022-01-02,1248
2,2022-01-03,832
3,2022-01-04,934
4,2022-01-05,914


In [32]:
df_group.rename(columns = {'ride_id':'bike_trips_daily'}, inplace = True)
df = df.merge(df_group, on = "date", how = 'outer', indicator = True)
print(df['_merge'].value_counts(dropna = False))
print("Shape of January 1st is", df[df['date'] == '2022-01-01'].shape) # Check 
print("Shape of January 2nd is", df[df['date'] == '2022-01-02'].shape) # Second check 

df_temp = df.set_index('date')

print(df_temp.columns)

_merge
both          895485
left_only          0
right_only         0
Name: count, dtype: int64
Shape of January 1st is (592, 20)
Shape of January 2nd is (1248, 20)
Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'temp_avg', 'month', 'season', 'value',
       'bike_trips_daily', '_merge'],
      dtype='object')


In [36]:
df_temp.head()

Unnamed: 0_level_0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,temp_avg,month,season,value,bike_trips_daily,_merge
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-01-01,115C78C3039FFA89,electric_bike,2022-01-01 09:21:14,2022-01-01 09:35:46,Essex Light Rail,JC038,Essex Light Rail,JC038,40.712774,-74.036486,40.712774,-74.036486,member,11.6,1,winter,1,592,both
2022-01-01,7FFD810CAA7A919E,classic_bike,2022-01-01 02:43:56,2022-01-01 02:43:57,12 St & Sinatra Dr N,HB201,12 St & Sinatra Dr N,HB201,40.750604,-74.02402,40.750604,-74.02402,member,11.6,1,winter,1,592,both
2022-01-01,E715E8432031B72C,classic_bike,2022-01-01 02:13:33,2022-01-01 02:18:42,Essex Light Rail,JC038,Washington St,JC098,40.712774,-74.036486,40.724294,-74.035483,member,11.6,1,winter,1,592,both
2022-01-01,BF1B7B1E1961A87B,electric_bike,2022-01-01 17:18:46,2022-01-01 18:55:25,Grand St,JC102,W 27 St & 7 Ave,6247.06,40.715178,-74.037683,40.746647,-73.993915,casual,11.6,1,winter,1,592,both
2022-01-01,4A01F0E53C6F4386,electric_bike,2022-01-01 11:23:32,2022-01-01 11:29:27,Christ Hospital,JC034,Hoboken Terminal - Hudson St & Hudson Pl,HB101,40.734786,-74.050444,40.735938,-74.030305,member,11.6,1,winter,1,592,both


In [None]:
# Create a figure with secondary y-axis configuration
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add bike rides daily data trace
fig.add_trace(
    go.Scatter(x=df['date'], y=df['bike_trips_daily'], name='Daily Bike Rides', marker_color='red'),
    secondary_y=False,  # False indicates this trace uses the left y-axis
)
# Add average temperature data trace
fig.add_trace(
    go.Scatter(x=df['date'], y=df['temp_avg'], name='Daily Temperature', marker_color='blue'),
    secondary_y=True,  # True indicates this trace uses the right y-axis
)
# Set x-axis title
fig.update_xaxes(title_text='Date')
# Set y-axes titles
fig.update_yaxes(title_text='Bike Rides Daily', secondary_y=False)
fig.update_yaxes(title_text='Average Temperature (°C)', secondary_y=True, tickfont_color='blue')
# Add a title to the plot
fig.update_layout(
    title='Daily Bike Rides and Temperature in 2022',
    showlegend=True
)
# Show the figure
fig.show()

## The plot for the code was deleted in order to reduce the file size

In [None]:
# Create a subplot figure with 2 rows and 1 column
fig = make_subplots(rows=2, cols=1, vertical_spacing=0.15)  # vertical_spacing adjusts space between subplots

# Add the daily bike rides data to the first subplot
fig.add_trace(
    go.Scatter(x=df_temp.index, y=df_temp['bike_trips_daily'], name='Daily Bike Trips', marker_color='navy'),
    row=1, col=1  # Position this trace in the first row and first column
)

# Add the average temperature data to the second subplot
fig.add_trace(
    go.Scatter(x=df_temp.index, y=df_temp['temp_avg'], name='Average Temperature', marker_color='red'),
    row=2, col=1  
)

# Update y-axes labels
fig.update_yaxes(title_text="Bike Trips Daily", row=1, col=1, title_font={"color": "navy", "size": 14})
fig.update_yaxes(title_text="Average Temperature (°C)", row=2, col=1, title_font={"color": "red", "size": 14})

# Add a title to the overall figure
fig.update_layout(
    title='Temperature and Bike Trips in 2022',
    title_font_size=18,
    showlegend=False  
)

# Show the figure
fig.show()

## The plot for the code was deleted in order to reduce the file size

In [39]:
# Save the top 20 stations as a csv file 

top20.to_csv('top20.csv')

In [40]:
# Save the df as a csv file 

df.to_csv('NewYork_newdf.csv')

In [41]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'temp_avg', 'month', 'season', 'value',
       'bike_trips_daily', '_merge'],
      dtype='object')

In [43]:
# Create a copy with fewer columns

df_1 = df.drop(columns = {'ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_id', 'end_station_id', 'member_casual', 'month', 'value',
       '_merge'}) 

In [44]:
df_1.head()

Unnamed: 0,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,date,temp_avg,season,bike_trips_daily
0,Essex Light Rail,Essex Light Rail,40.712774,-74.036486,40.712774,-74.036486,2022-01-01,11.6,winter,592
1,12 St & Sinatra Dr N,12 St & Sinatra Dr N,40.750604,-74.02402,40.750604,-74.02402,2022-01-01,11.6,winter,592
2,Essex Light Rail,Washington St,40.712774,-74.036486,40.724294,-74.035483,2022-01-01,11.6,winter,592
3,Grand St,W 27 St & 7 Ave,40.715178,-74.037683,40.746647,-73.993915,2022-01-01,11.6,winter,592
4,Christ Hospital,Hoboken Terminal - Hudson St & Hudson Pl,40.734786,-74.050444,40.735938,-74.030305,2022-01-01,11.6,winter,592


# Create a random split

In [46]:
np.random.seed(32)
red = np.random.rand(len(df_1)) <= 0.92

In [47]:
small = df_1[~red]

In [48]:
small.shape

(71391, 10)

In [49]:
# saving the small dataframe to csv 
small.to_csv('reduced_data_to_plot_7.csv',index = False)

In [50]:
# saving the reduced dataframe to csv 
df_1.to_csv('reduced_data_to_plot.csv')