In [147]:
import dash
from dash import html
from dash import dcc
from dash.dependencies import Input, Output
import altair as alt
from IPython.display import IFrame
import pandas as pd
import numpy as np
import os
import glob
import zipfile
import shutil
from haversine import haversine, Unit
import matplotlib.pyplot as plt
import dash_bootstrap_components as dbc
from datetime import datetime

Process Data

In [55]:
def calculate_distance(row):
    start_coords = (row['start_lat'], row['start_lng'])
    end_coords = (row['end_lat'], row['end_lng'])
    return round(haversine(start_coords, end_coords, unit=Unit.KILOMETERS)*1000,2)

zip_path = '../data/raw/*.zip'
files = glob.glob(zip_path)
for zip_file in files:
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        temp_dir = 'temp_extracted'
        zip_ref.extractall(temp_dir)
path = os.path.join(temp_dir, '*.csv')
csv_files = glob.glob(path)
data = pd.DataFrame()
for file in csv_files:
    df = pd.read_csv(file, parse_dates=['started_at','ended_at'])
    df.head()
    data = pd.concat([data, df], ignore_index=True)
shutil.rmtree(temp_dir, ignore_errors=True)

data.dropna(subset=['end_lat', 'end_lng'], inplace=True)
data['ride_duration'] = round((data['ended_at'] - data['started_at']).dt.total_seconds()/60, 2) # in minutes
data['ride_distance'] = data.apply(calculate_distance, axis=1) # in meters

In [56]:
data.isna().sum()

ride_id                    0
rideable_type              0
started_at                 0
ended_at                   0
start_station_name    517595
start_station_id      517727
end_station_name      545690
end_station_id        545831
start_lat                  0
start_lng                  0
end_lat                    0
end_lng                    0
member_casual              0
ride_duration              0
ride_distance              0
dtype: int64

In [58]:
data.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,ride_duration,ride_distance
0,F96D5A74A3E41399,electric_bike,2023-01-21 20:05:42,2023-01-21 20:16:33,Lincoln Ave & Fullerton Ave,TA1309000058,Hampden Ct & Diversey Ave,202480.0,41.924074,-87.646278,41.93,-87.64,member,10.85,839.04
1,13CB7EB698CEDB88,classic_bike,2023-01-10 15:37:36,2023-01-10 15:46:05,Kimbark Ave & 53rd St,TA1309000037,Greenwood Ave & 47th St,TA1308000002,41.799568,-87.594747,41.809835,-87.599383,member,8.48,1204.58
2,BD88A2E670661CE5,electric_bike,2023-01-02 07:51:57,2023-01-02 08:05:11,Western Ave & Lunt Ave,RP-005,Valli Produce - Evanston Plaza,599,42.008571,-87.690483,42.039742,-87.699413,casual,13.23,3543.69
3,C90792D034FED968,classic_bike,2023-01-22 10:52:58,2023-01-22 11:01:44,Kimbark Ave & 53rd St,TA1309000037,Greenwood Ave & 47th St,TA1308000002,41.799568,-87.594747,41.809835,-87.599383,member,8.77,1204.58
4,3397017529188E8A,classic_bike,2023-01-12 13:58:01,2023-01-12 14:13:20,Kimbark Ave & 53rd St,TA1309000037,Greenwood Ave & 47th St,TA1308000002,41.799568,-87.594747,41.809835,-87.599383,member,15.32,1204.58


In [212]:
# Set up the app with the Bootstrap theme
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.MINTY])

# Define the layout with Dash Bootstrap Components
app.layout = html.Div(
    dbc.Container(
        [
            html.H1("Bike Sharing Analysis", className="text-primary", style={'text-align': 'center'}),
            dbc.Row(
                [
                    dbc.Col([
                        dcc.DatePickerRange(
                            id='date-picker-range',
                            start_date= data['started_at'].min(),
                            end_date=data['ended_at'].max(),
                            display_format='YYYY-MM-DD',
                            className="mt-3"
                        ),
                        html.Div(id='date-output', className='mt-3', style={'text-align': 'center'})
                    ], style={'display': 'flex', 'flexDirection': 'column',  'alignItems': 'center'})
                ]),
            dbc.Row(
                [
                    dbc.Col(html.Div("Column 2", className="border"), width=6),
                    dbc.Col(html.Div("Column 3", className="border"), width=6),
                ]
            )]
))

@app.callback(
    Output('date-output', 'children'),
    [Input('date-picker-range', 'start_date'),
     Input('date-picker-range', 'end_date')]
)
def update_date_output(start_date, end_date):
    formatted_start_date = datetime.fromisoformat(start_date).strftime('%Y-%m-%d')
    formatted_end_date = datetime.fromisoformat(end_date).strftime('%Y-%m-%d')
    return ""

if __name__ == '__main__':
    app.run_server(debug=True)