## Import Libraries and Files

In [3]:
# Import Libraries 
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import streamlit as st
from datetime import datetime as dt

In [4]:
# Import File
bike = pd.read_pickle(r'../Data/Prepared/bike_flag.pkl')

## Data Preparation

In [5]:
# Create Month col. 
bike['date'] = pd.to_datetime(bike['date'], format = '%Y-%m-%d')

In [6]:
bike['month'] = bike['date'].dt.month

In [7]:
bike['month'] = bike['month'].astype('int')

In [8]:
# Create season col. 
bike['season'] = [
    'winter' if (12 < month <=3)
    else 'spring' if (3 < month <=6)
    else 'summer' if (6 < month <=9)
    else 'fall'
for month in bike['month']
]

## Visualisations 

#### bar chart

In [10]:
# Look for most popular stations
bike['value'] = 1
bike_grouped = bike.groupby('start_station', as_index = False).agg({'value' : 'sum'})

In [11]:
# Define top 20 start stations
top20 = bike_grouped.nlargest(20, 'value')

In [None]:
# Plot top 20 start stations
fig = go.Figure(go.Bar(x = top20['start_station'], y = top20['value'], marker = {'color' : top20['value'], 'colorscale' : 'oranges'} ))
fig.update_layout(
    title = '20 Most Popular Bike Stations in NY', 
                  xaxis_title = 'Start Stations', 
                  yaxis_title = 'Trips', 
                  width = 900, 
                  height = 600
)

#### line chart

In [31]:
# Sort the data by date
bike = bike.sort_values(by='date')

In [None]:
# Create subplot with secondary y-axis
fig = make_subplots(specs=[[{'secondary_y': True}]])

# Add first trace for Daily Bike Rides (primary y-axis)
fig.add_trace(
    go.Scatter(
        x=bike['date'], 
        y=bike['trips_per_day'], 
        name='Daily Bike Rides', 
        marker={'color': 'blue'},
        line=dict(color='blue')
    ), secondary_y=False  #Specify secondary_y  
)

# Add second trace for Daily Temperature (secondary y-axis)
fig.add_trace(
    go.Scatter(
        x=bike['date'], 
        y=bike['avg_temp'], 
        name='Daily Temperature', 
        marker={'color': 'red'},
        line=dict(color='red')
    ), secondary_y=True  # Specify secondary_y here
)

# Update layout
fig.update_layout(
    title='Daily Bike Trips and Temperature in NY (2023)', 
    height=800
)

## Save Files

In [12]:
# Save top 20 start stations
top20.to_csv(r'../Data/Prepared/top20.csv')

In [18]:
# Select data for dashboard
dash = bike[['date', 'start_station', 'trips_per_day', 'avg_temp', 'season', 'value']]

# Sample data
np.random.seed(32)
red = np.random.rand(len(dash)) <= 0.92
small = dash[~red]

# Check sample
small.shape

 

(79019, 6)

In [19]:
# Save dashboard data
small.to_csv(r'../Data/Prepared/dashdata.csv')