In [1]:
import dash
from dash import html
from dash import dcc
from dash.dependencies import Input, Output
import altair as alt
from IPython.display import IFrame
import pandas as pd
import numpy as np
import os
import glob
import zipfile
import shutil
from haversine import haversine, Unit
import matplotlib.pyplot as plt
import dash_bootstrap_components as dbc
from datetime import datetime
import plotly.express as px

Process Data

In [2]:
def calculate_distance(row):
    start_coords = (row['start_lat'], row['start_lng'])
    end_coords = (row['end_lat'], row['end_lng'])
    return round(haversine(start_coords, end_coords, unit=Unit.KILOMETERS)*1000,2)

zip_path = '../data/raw/*.zip'
files = glob.glob(zip_path)
for zip_file in files:
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        temp_dir = 'temp_extracted'
        zip_ref.extractall(temp_dir)
path = os.path.join(temp_dir, '*.csv')
csv_files = glob.glob(path)
data = pd.DataFrame()
for file in csv_files:
    df = pd.read_csv(file, parse_dates=['started_at','ended_at'])
    df.head()
    data = pd.concat([data, df], ignore_index=True)
shutil.rmtree(temp_dir, ignore_errors=True)

data.dropna(subset=['end_lat', 'end_lng'], inplace=True)
data['ride_duration'] = round((data['ended_at'] - data['started_at']).dt.total_seconds()/60, 2) # in minutes
data['ride_distance'] = data.apply(calculate_distance, axis=1) # in meters
data.drop(data[data['ride_duration'] < 0].index, inplace=True)

In [3]:
data.isna().sum()

ride_id                    0
rideable_type              0
started_at                 0
ended_at                   0
start_station_name    517513
start_station_id      517645
end_station_name      545621
end_station_id        545762
start_lat                  0
start_lng                  0
end_lat                    0
end_lng                    0
member_casual              0
ride_duration              0
ride_distance              0
dtype: int64

In [4]:
data.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,ride_duration,ride_distance
0,F96D5A74A3E41399,electric_bike,2023-01-21 20:05:42,2023-01-21 20:16:33,Lincoln Ave & Fullerton Ave,TA1309000058,Hampden Ct & Diversey Ave,202480.0,41.924074,-87.646278,41.93,-87.64,member,10.85,839.04
1,13CB7EB698CEDB88,classic_bike,2023-01-10 15:37:36,2023-01-10 15:46:05,Kimbark Ave & 53rd St,TA1309000037,Greenwood Ave & 47th St,TA1308000002,41.799568,-87.594747,41.809835,-87.599383,member,8.48,1204.58
2,BD88A2E670661CE5,electric_bike,2023-01-02 07:51:57,2023-01-02 08:05:11,Western Ave & Lunt Ave,RP-005,Valli Produce - Evanston Plaza,599,42.008571,-87.690483,42.039742,-87.699413,casual,13.23,3543.69
3,C90792D034FED968,classic_bike,2023-01-22 10:52:58,2023-01-22 11:01:44,Kimbark Ave & 53rd St,TA1309000037,Greenwood Ave & 47th St,TA1308000002,41.799568,-87.594747,41.809835,-87.599383,member,8.77,1204.58
4,3397017529188E8A,classic_bike,2023-01-12 13:58:01,2023-01-12 14:13:20,Kimbark Ave & 53rd St,TA1309000037,Greenwood Ave & 47th St,TA1308000002,41.799568,-87.594747,41.809835,-87.599383,member,15.32,1204.58


In [5]:
len(data[(data['started_at'] > '2023-02-01 00:00:00') & (data['started_at'] < '2023-02-28 00:00:00')])

180088

In [6]:
hourly_data = pd.DataFrame(data['started_at'].dt.hour.value_counts().sort_index(), columns=['count']).reset_index()

alt.data_transformers.enable('vegafusion')
data['started_at'] = pd.to_datetime(data['started_at'])
data['ended_at'] = pd.to_datetime(data['ended_at'])
df = data[(data['started_at'] > '2023-01-01 00:00:00') & (data['started_at'] < '2023-01-31 00:00:00')]

start_date = df['started_at'].min()
end_date = df['ended_at'].max()

filtered_df = df[(df['started_at']>=str(start_date)) & (df['ended_at']<=str(end_date))]

by_hour = alt.Chart(filtered_df).mark_bar().encode(
    x=alt.X('hours(started_at):O', title=None),
    y=alt.Y('count():Q', axis=alt.Axis(ticks=False), title=None),
    color=alt.Color('rideable_type:N', title=None, scale=alt.Scale(scheme='greenblue')),
    tooltip=['rideable_type:N', 'count():Q']).transform_filter(
        alt.FieldOneOfPredicate(field='rideable_type', oneOf=['electric_bike', 'classic_bike']))

by_day = alt.Chart(filtered_df).mark_bar().encode(
    x=alt.X('day(started_at):O', title=None),
    y=alt.Y('count():Q', axis=alt.Axis(ticks=False), title=None),
    color=alt.Color('rideable_type:N')
    )

alt.concat(by_hour | by_day).configure_view(stroke=None)



# by_month = alt.Chart(data).mark_bar().encode(
#     x=alt.X('month(started_at):O'),
#     y=alt.Y('mean(ride_duration):Q'),
#     color=alt.Color('rideable_type:N')
#     )

# by_duration = alt.Chart(data).mark_bar(clip=True).encode(
#     x=alt.X('ride_duration:Q', bin=alt.Bin(maxbins=50,extent=[0,140])),
#     y=alt.Y('count():Q'),
#     color=alt.Color('rideable_type:N'),
#     tooltip=['ride_duration:Q', 'count()']
# ).interactive()


In [12]:
# Set up the app with the Bootstrap theme
alt.data_transformers.enable('vegafusion')
df = data[(data['started_at'] > '2023-01-01 00:00:00') & (data['started_at'] < '2023-01-31 00:00:00')]
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.MINTY])

# Define the layout with Dash Bootstrap Components
app.layout = html.Div(
    dbc.Container(
        [
            html.H1("Bike Sharing Analysis", className="text-primary", style={'text-align': 'center'}),
            dbc.Row(
                [
                    dbc.Col(
                        [ 
                            html.Div(
                                [
                                    dcc.DatePickerRange(
                                        id='date-picker-range',
                                        start_date=df['started_at'].min(),
                                        end_date=df['ended_at'].max(),
                                        display_format='YYYY-MM-DD',
                                        className="mt-3",
                                        min_date_allowed=df['started_at'].min(), 
                                        max_date_allowed=df['ended_at'].max()
                                    )
                                ]
                            )
                        ],
                        style={'display': 'flex', 'flexDirection': 'column',  'alignItems': 'center', 'border': '1px solid black'}
                    )
                ]
            ),
            dbc.Row(
                [
                    html.Div(
                        [
                            dbc.Table(
                                # Table header
                                children=[
                                    html.Tbody(
                                        [
                                            html.Tr(
                                                [
                                                    html.Th(scope='row', className='table-dark', children="Rider Trends"),
                                                ]
                                            ),
                                        ]
                                    )
                                ], 
                                style={'text-align':'center', 'padding':'0vh', 'margin':'0vh'}
                            )
                        ]
                    )
                ]
            ),
            dbc.Row(
                [
                    dbc.Col(
                        [
                            html.Iframe(
                                id='rider-trend-bar',
                                srcDoc='',
                                style={'width': '100%', 'height':'100%', 'margin': 'auto', 'border': '1px solid black'}
                            )
                        ], 
                        width=10, 
                        style={'height':'45vh','margin-left': '0px', 'margin-right': '0px', 'paddingRight': '0px'}
                    ),
                    dbc.Col(
                        [
                            dcc.Checklist(
                                id='rider-trend-box',
                                options=[
                                    {'label': "Classic Bike", 'value': 'classic_bike'},
                                    {'label': "Docked Bike", 'value': 'docked_bike'},
                                    {'label': "Electric Bike", 'value': 'electric_bike'}
                                ],
                                value=['classic_bike', 'docked_bike', 'electric_bike'],
                                labelStyle={'display': 'block', 'margin-bottom': '5px'},
                                inputClassName="form-check-input",
                                labelClassName="form-check-label",
                                className="form-check",
                                style={'text-align': 'left', 'width': 'fit-content', 'border': '1px solid black'}
                            )
                        ], 
                        width=2, 
                        style={'margin': 'auto', 'paddingLeft': '0px', 'text-align': 'left'}
                    )
                ]
            ),
            dbc.Row(
                [
                    dcc.RadioItems(
                        id='rider-trend-radio',
                        options=[
                            {'label': "Number of Rides", 'value': 'count()'},
                            {'label': "Average Duration", 'value': 'mean(ride_duration)'}
                        ],
                        value='count()',
                        labelStyle={'display': 'inline-block', 'margin-left': '20px', 'margin-right': '20px'},
                        inputClassName="form-check-input",
                        labelClassName="form-check-label",
                        className="form-check",
                        style={'text-align': 'center', 'margin': 'auto', 'width': 'fit-content', 'border': '1px solid black'}
                    )
                ], 
                style={'text-align': 'center'}
            )
        ]
    )
)

@app.callback(
    Output('rider-trend-bar', 'srcDoc'),
    [Input('rider-trend-radio', 'value'),
    Input('rider-trend-box', 'value'),
    Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date')]
)
def plot_rider_trend(func, cat, start_date, end_date):
    filtered_df = df[(df['started_at'] >= str(start_date)) & (df['ended_at']<= str(end_date))]

    by_hour = alt.Chart(filtered_df).mark_bar().encode(
        x=alt.X('hours(started_at):O', title=None),
        y=alt.Y(f'{func}:Q', axis=alt.Axis(ticks=False), title=None),
        color=alt.Color('rideable_type:N', title=None, scale=alt.Scale(scheme='viridis')),
        tooltip=[
            alt.Tooltip('rideable_type:N', title='Ride Type'),
            alt.Tooltip(f'{func}:Q', title=None)
        ]
    ).transform_filter(alt.FieldOneOfPredicate(field='rideable_type', oneOf=cat))
    
    by_day = alt.Chart(filtered_df).mark_bar().encode(
        x=alt.X('day(started_at):O', title=None),
        y=alt.Y(f'{func}:Q', axis=alt.Axis(ticks=False), title=None),
        color=alt.Color('rideable_type:N', scale=alt.Scale(scheme='viridis')),
        tooltip=[
            alt.Tooltip('rideable_type:N', title='Ride Type'),
            alt.Tooltip(f'{func}:Q', title=None)
        ]
    ).transform_filter(alt.FieldOneOfPredicate(field='rideable_type', oneOf=cat))
    
    chart_1 = alt.concat(by_hour | by_day).configure_view(stroke=None)
    vega_lite_json = chart_1.to_json(format='vega')

    chart_obj = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
            <script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
            <script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
        </head>
        <body>
        <center>
            <div id="altair-chart-container"></div>
            <script>
                var spec = {vega_lite_json};
                vegaEmbed('#altair-chart-container', spec);
            </script>
        </center>
        </body>
        </html>
    """
    return chart_obj


if __name__ == '__main__':
    app.run_server(debug=True)
