# Building dashboards with streamlit for CitiBike2022 NewYork

## Importing Libraries

In [2]:
import streamlit as st
import pandas as pd 
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pydeck as pdk
import ipywidgets as widgets

## Top20 Starting Stations Bar PLot

In [4]:
Top20_start_stations=pd.read_csv("Top20_start_stations.csv")

In [5]:
Top20_start_stations = Top20_start_stations.rename(columns={'ride_id': 'ride_count'})
Top20_start_stations.head()

Unnamed: 0,start_station_name,ride_count
0,West St & Chambers St,83747
1,W 21 St & 6 Ave,81113
2,Broadway & W 58 St,72908
3,6 Ave & W 33 St,70250
4,1 Ave & E 68 St,68446


In [None]:
bar1 = go.Figure(go.Bar(x = Top20_start_stations['start_station_name'], y = Top20_start_stations['ride_count'], marker={'color': Top20_start_stations['ride_count'],'colorscale': 'Blues'}))
bar1.update_layout(
    title = 'Top 20 most popular bike stations in NewYork 2022',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 900, height = 600
)
bar1.show()

## CitiBike Rides Monthly Volume vs. Temprature Line Plot

In [54]:
ride_temp=pd.read_csv("../Data/citybike2022_weather.csv", usecols=["date","tavg","ride_id"])
ride_temp.head()

Unnamed: 0,date,tavg,ride_id
0,2022-01-01,11.6,4A091640AEC83471
1,2022-01-01,11.6,7A560D6287C00126
2,2022-01-01,11.6,4F6F726B809C15E4
3,2022-01-01,11.6,3BC10DC94FD79955
4,2022-01-01,11.6,FE4CE7F1CFFD7952


In [55]:
ride_temp['date'].min(), ride_temp['date'].max()

('2022-01-01', '2022-08-29')

In [56]:
ride_temp['date'] = pd.to_datetime(ride_temp['date'])

In [57]:
ride_temp.dtypes

date       datetime64[ns]
tavg              float64
ride_id            object
dtype: object

In [58]:
ride_temp['date'].min(), ride_temp['date'].max()

(Timestamp('2022-01-01 00:00:00'), Timestamp('2022-08-29 00:00:00'))

In [59]:
monthly = ride_temp.resample('ME', on='date').agg(
    ride_count=('ride_id', 'count'),
    tavg=('tavg', 'mean')
).reset_index()

In [60]:
monthly.head(12)

Unnamed: 0,date,ride_count,tavg
0,2022-01-31,1024055,0.764144
1,2022-02-28,1197359,3.983508
2,2022-03-31,1846035,8.069757
3,2022-04-30,2261339,11.476519
4,2022-05-31,2865301,17.898715
5,2022-06-30,3344145,22.182325
6,2022-07-31,3397722,26.920687
7,2022-08-31,3312397,26.517238


In [64]:
monthly.to_csv("monthly_rides_temp.csv")

In [None]:
# Line chart with dual axis

line1 = make_subplots(specs=[[{"secondary_y": True}]])

line1.add_trace(
    go.Scatter(
        x=monthly['date'], 
        y=monthly['ride_count'], 
        name='Monthly bike rides',
        marker={'color': 'blue'}  
    ),
    secondary_y=False
)

line1.add_trace(
    go.Scatter(
        x=monthly['date'], 
        y=monthly['tavg'], 
        name='Monthly temperature',
        marker={'color': 'red'}  
    ),
    secondary_y=True
)

line1.update_layout(
    width=900,   
    height=600    
)

line1.show()

## CitiBike Rides Volume of the Top 20 Stations over the weekday Heatmap

In [6]:
weekday_top20 = pd.read_csv("weekday_top20_stations.csv")
weekday_top20.head()

Unnamed: 0,start_station_name,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
0,1 Ave & E 110 St,1849.0,2023.0,2160.0,2036.0,2200.0,1872.0,1719.0
1,1 Ave & E 16 St,5935.0,6533.0,6586.0,6782.0,6678.0,6038.0,5402.0
2,1 Ave & E 18 St,6169.0,6758.0,7184.0,6943.0,7307.0,6664.0,5789.0
3,1 Ave & E 30 St,4289.0,5015.0,5175.0,4821.0,4486.0,2976.0,2769.0
4,1 Ave & E 39 St,4624.0,5617.0,5718.0,5564.0,5134.0,4198.0,4069.0


In [7]:
# Extract the top20 station names as a list
top20_names = Top20_start_stations["start_station_name"].tolist()

In [8]:
#filter the grouped df to the top20 station names
weekday_top20 = weekday_top20[weekday_top20["start_station_name"].isin(top20_names)]

In [9]:
weekday_top20 = (
    weekday_top20
    .groupby("start_station_name", sort=False)
    .sum()
)

In [10]:
weekday_top20.shape

(20, 7)

In [62]:
weekday_top20.to_csv("weekday_top20_stations.csv")


In [None]:
# Heatmap for rides volume of the top20 stations over the week days 

heatmap1 = go.Figure(
    data=go.Heatmap(
        z=weekday_top20.values,
        x=weekday_top20.columns,
        y=weekday_top20.index,
        colorscale="PuBuGn",
        showscale=True
    )
)

heatmap1.update_layout(
    title="Ride Counts by Weekday for Top 20 Start Stations",
    xaxis_title="Weekday",
    yaxis_title="Top20 Station Name (descending)",
    width=1000,
    height=750,
    margin=dict(l=200)   # corrected margin key
)
# Force y-axis order
heatmap1.update_yaxes(
    categoryorder="array",
    categoryarray=weekday_top20.index,
    title_standoff=20
)

heatmap1.show()


## CitiBike Rides Volume over the weekday and around the clock Heatmap

In [33]:
weekday_hour=pd.read_csv("weekday_hour_rides.csv", index_col=0)
weekday_hour.head()

Unnamed: 0_level_0,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
start_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,27298,24979,28675,32959,39828,61776,56292
1,14711,12640,14461,16683,22606,44480,42173
2,8890,7690,8644,9732,13407,32377,29781
3,5858,5024,5509,5938,8381,20439,20015
4,6961,6415,6612,6587,8243,13480,13571


In [None]:
# Heatmap for rides volume over the week days aroud the clock

heatmap2 = go.Figure(
    data=go.Heatmap(
        z=weekday_hour.values,
        x=weekday_hour.columns,
        y=weekday_hour.index,
        colorscale="PuBuGn",
        showscale=True
    )
)

heatmap2.update_layout(
    title="Rides volume over the week days aroud the clock",
    xaxis_title="Weekday",
    yaxis_title="hour of the day",
    width=900,
    height=750
)
heatmap2.show()

## CitiBike most popular Routs