In [81]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("edwinytleung/nyc-yellow-taxi-2015-sample-data")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/edwinytleung/nyc-yellow-taxi-2015-sample-data/versions/2


In [83]:
!pip install dash



In [84]:
!pip install dash_bootstrap_components



In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dash import dcc, html, Dash, dash_table, callback
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
from dash import no_update

In [None]:
train = pd.read_csv(path + '/train_2015.csv')
# save data on local device
train.to_csv('train.csv', index=False)
train

In [87]:
# Q1: How many total trips are in the dataset?
totalNoTrip = train.shape[0]
print('Total trips in the dataset:', totalNoTrip)

Total trips in the dataset: 2936867


In [88]:
# Q2: What are the unique taxi vendors in the dataset?
print('Unique taxi vendors in the dataset:', train['VendorID'].unique())

Unique taxi vendors in the dataset: [2 1]


In [89]:
# Style of the Dashboard
buttonStyle = {
    'backgroundColor': 'black',
    'color': '#007bff',
    'fontWeight': "bolder",
    'fontSize': '20px',
    'width' : '100%',
    'height': '130px',
    'border': '5px #007bff solid',
    'padding': '10px',
    'text-align': 'center',
}


In [100]:
import dash
from dash import html, dcc, Dash
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output


# Initialize App
external_stylesheets = [dbc.themes.CERULEAN]
app = Dash(__name__, external_stylesheets=external_stylesheets)

# Layout
app.layout = dbc.Container([
    dbc.Row(html.H1("Yellow Taxi Trips Analysis", className="text-primary text-center fs-1 fw-bolder")),

    dbc.Row([
        dbc.Col(
            dbc.Card(
                dbc.CardBody(html.H4(f"🚖 Total Trips: {totalNoTrip}", className="card-title text-black text-center")),
                className="rounded-3",
                style={"box-shadow": "0px 4px 10px rgba(0, 0, 255, 0.5)", "backgroundColor": "#007bff"}
            ),
            width=6,
        ),
        dbc.Col(
            dbc.Card(
                dbc.CardBody(html.H4("🛺 2 Unique Taxi Vendors", className="card-title text-black text-center")),
                className="rounded-3",
                style={"box-shadow": "0px 4px 10px rgba(0, 0, 255, 0.5)", "backgroundColor": "#007bff"}
            ),
            width=6,
        )
    ], className="my-3"),

    dbc.Row([
        dbc.Col(
            html.Div([
                html.Button("Vendor Analysis", className="btn my-3", id="vendor-analysis-button", style=buttonStyle),
                html.Button("Trip Place Analysis", className="btn my-3", id="trip-analysis-button", style=buttonStyle),
                html.Button("Fare Analysis", className="btn my-3", id="fare-analysis-button", style=buttonStyle),
                dcc.Store(id="active-button", data="")
            ]),
            width=3
        ),
        dbc.Col(html.Div(id="output-content"), width=9)
    ], className="my-3", style={'height': '70vh'}),

], fluid=True, className="bg-black", style={'height': '120vh'})

# Callback
@app.callback(
    [Output("output-content", "children"),
     Output("vendor-analysis-button", "style"),
     Output("trip-analysis-button", "style"),
     Output("fare-analysis-button", "style")],
    [Input("vendor-analysis-button", "n_clicks"),
     Input("trip-analysis-button", "n_clicks"),
     Input("fare-analysis-button", "n_clicks")]
)
def update_output(n_vendor, n_trip, n_fare):
    ctx = dash.callback_context
    button_id = ctx.triggered[0]['prop_id'].split('.')[0] if ctx.triggered and ctx.triggered[0]['prop_id'] else None

    content = html.Div([
        dbc.Row([
            dbc.Col([
                html.H2("Yellow Taxi Trips Analysis", className="text-primary text-center fs-1 fw-bolder"),
                html.P("It is an analysis made for taxi trips from all around the world.", className="text-center text-white"),
                html.P("Made by: Aly El-Badry", className="text-center text-secondary fs-5 fw-bold"),
                dbc.Button("My Portfolio",
                          href="https://alyelbadryportfolio.netlify.app/",
                          className="mt-3",
                          style={"border": "3px solid #007bff", "backgroundColor": "black", "color": "white", "padding": "10px 20px"}
                )
            ], width=12, className="text-center")

        ], className="justify-content-center mt-5")
    ],
    className="d-flex flex-column align-items-center justify-content-center text-white",
    style={"backgroundColor": "black", "height": "65vh"})



    if button_id == "vendor-analysis-button":
        content = html.Div([
            dbc.Row(html.H2("Vendor Analysis", className="text-primary text-center fs-1 fw-bolder")),
            dbc.Row([
                dbc.Col([
                    html.Div([
                        html.P("Choose type of Vendor-Taxi: ", style={"color": "white"}),

                        dcc.Dropdown(
                            id='vendor-dropdown',
                            options=[
                                {"label": "Vendor 1", "value": 1},
                                {"label": "Vendor 2", "value": 2}
                            ],
                            value=1,
                            style={"color": "black", "backgroundColor": "#007bff"},
                            clearable=False
                        ),

                        html.P("Choose the fare amount: ", style={"color": "white", "margin-top": "20px"}),

                        dcc.Slider(
                            id='fare-slider',
                            min=0,
                            max=100,
                            step=1,
                            value=10,
                            marks={i: f"${i}" for i in range(0, 101, 10)},
                            tooltip={"placement": "bottom", "always_visible": True},
                            included=True
                        ),

                        html.P("Choose the trip Date: ", style={"color": "white", "margin-top": "20px"}),

                        dcc.DatePickerRange(
                            id='date-picker',
                            start_date=train['tpep_pickup_datetime'].min(),
                            end_date=train['tpep_pickup_datetime'].max(),
                            display_format='YYYY-MM-DD',
                        ),

                        html.Div(id="average", style={"color": "white"}, className="my-3")
                    ], style={"padding": "20px"})
                ], width=4),
                dbc.Col([
                    dcc.Graph(id='vendor-graph', figure={})
                ], width=8)
          ])
        ])

    elif button_id == "trip-analysis-button":
        content = html.Div([
            dbc.Row(html.H2("Trip Analysis for 20000 of data", className="text-primary text-center fs-1 fw-bolder")),
            dbc.Row(id="Trip-analysis")
        ])


    elif button_id == "fare-analysis-button":
        content = html.Div([
            dbc.Row(html.H2("Fare Analysis", className="text-primary text-center fs-1 fw-bolder")),
            dbc.Row(id="Fare-analysis")
        ])

    return (
        content,
        active_class if button_id == "vendor-analysis-button" else buttonStyle,
        active_class if button_id == "trip-analysis-button" else buttonStyle,
        active_class if button_id == "fare-analysis-button" else buttonStyle
    )
@app.callback(
    [Output('vendor-graph', 'figure'),
     Output("average", "children")],
    [Input('vendor-dropdown', 'value'),
     Input('fare-slider', 'value'),
     Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date')]
)
def update_vendor_graph(selected_vendor, selected_fare, start_date, end_date):
    filtered_df = train[
        (train["VendorID"] == selected_vendor) &
        (train["fare_amount"] <= selected_fare) &
        (train["tpep_pickup_datetime"] >= start_date) &
        (train["tpep_pickup_datetime"] <= end_date)
    ]
    average_fare = filtered_df["fare_amount"].mean()

    fig = px.histogram(
        filtered_df[:20000],
        x="tpep_dropoff_datetime",
        y="fare_amount",
        title="Fare Amount Distribution Over Time",
        color="VendorID",
        nbins=50,
        barmode="overlay",
        opacity=0.7,
        color_discrete_sequence=["#007bff", "#ff5733"]
    )

    fig.update_layout(
        xaxis_title="Dropoff Date",
        yaxis_title="Fare Amount",
        template="plotly_dark"
    )
    return (fig, html.P(f"Average Fare: ${average_fare:.2f}"))
@app.callback(
    Output("Trip-analysis", "children"),
    Input("trip-analysis-button", "n_clicks")
)

def update_trip_analysis(n_clicks):
    if not n_clicks:
        return dash.no_update

    fig1 = px.scatter_mapbox(
        train[:20000],
        lat="dropoff_latitude",
        lon="dropoff_longitude",
        hover_name="VendorID",
        color="VendorID",
        zoom=8,
        center={"lat": train["dropoff_latitude"][1], "lon": train["dropoff_longitude"][1]},
        title="Dropoff Locations"
    )

    fig1.update_layout(
        mapbox_style="carto-positron",
        template="plotly_dark"
    )

    fig2 = px.density_mapbox(
        train[:20000],
        lat="dropoff_latitude",
        lon="dropoff_longitude",
        radius=1,
        center={"lat": train["dropoff_latitude"][1], "lon": train["dropoff_longitude"][1]},
        zoom=8,
        title="Dropoff Density"
    )

    fig2.update_layout(
        mapbox_style="carto-positron",
        template="plotly_dark"
    )

    contact = dbc.Row([
        dbc.Col(
            dcc.Graph(figure=fig1)
        ),
        dbc.Col(
            dcc.Graph(figure=fig2)
        )])
    return contact



@app.callback(
    Output("Fare-analysis", "children"),
    Input("fare-analysis-button", "n_clicks")
)

def update_fare_analysis(n_clicks):
    if not n_clicks:
        return dash.no_update

    most_common_payment = train["payment_type"].value_counts().idxmax()
    most_common_long = train["pickup_longitude"].value_counts().idxmax()
    most_common_lat = train["pickup_latitude"].value_counts().idxmax()


    fig1 = px.histogram(
        train[:20000],
        x="fare_amount",
        nbins=50,
        title="Fare Amount Distribution",
        color_discrete_sequence=["#007bff"]
    )

    fig1.update_layout(
        xaxis_title="Fare Amount",
        yaxis_title="Frequency",
        template="plotly_dark"
    )
    train["trip_duration"] = (pd.to_datetime(train["tpep_dropoff_datetime"]) - pd.to_datetime(train["tpep_pickup_datetime"])).dt.total_seconds() / 60

    fig2 = px.scatter(
        train[:20000],
        x="trip_distance",
        y="trip_duration",
        title="Trip Duration vs. Distance",
        color_discrete_sequence=["#007bff"]
    )
    fig2.update_layout(
        xaxis_title="Trip Distance",
        yaxis_title="Trip Duration",
        template="plotly_dark"
    )

    content = dbc.Row([
        dbc.Row(html.P(f"Most Common Payment Type: {most_common_payment}", className="text-primary text-center fs-5 fw-bolder")),
        dbc.Row(html.P(f"Most Common Pickup Place: {most_common_long} and {most_common_lat}", className="text-primary text-center fs-5 fw-bolder")),
        dbc.Row([
          dbc.Col(
              dcc.Graph(figure=fig1)
          ),
          dbc.Col(
              dcc.Graph(figure=fig2)
          )
        ])
    ])
    return content


# Run App
if __name__ == '__main__':
    app.run(debug=False)


<IPython.core.display.Javascript object>