In [1]:
import pandas as pd
from pathlib import Path
import plotly.express as px

In [2]:
ROOT = Path("..")

marts_pandas = ROOT / "data" / "processed" / "marts"
marts_spark  = ROOT / "data" / "processed" / "marts_spark"

In [3]:
# Use Spark marts (proves Spark output is usable), fallback to pandas marts if you want
agg_hour = pd.read_parquet(marts_spark / "agg_hour")
agg_dow  = pd.read_parquet(marts_spark / "agg_dow")
agg_zone = pd.read_parquet(marts_spark / "agg_zone")
pay_unk  = pd.read_parquet(marts_spark / "pay_unknown_by_hour")

In [4]:
agg_hour.head()

Unnamed: 0,pickup_hour,trips,avg_fare,avg_distance
0,0,300979,19.692419,3.854704
1,1,201948,17.733322,3.267925
2,2,139892,16.623149,3.04057
3,3,91788,18.530557,3.516674
4,4,61136,23.436246,4.870803


In [5]:
fig = px.line(
    agg_hour,
    x="pickup_hour",
    y="trips",
    title="NYC Yellow Taxi Trips by Hour (Jan 2024)",
    markers=True
)
fig.update_xaxes(dtick=2)
fig.update_yaxes(dtick=200000)
fig.show()
out = ROOT / "dashboards"
out.mkdir(exist_ok=True)

fig.write_image(out / "trips_by_hour.png", scale=2)

In [6]:
dow_order = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
agg_dow["pickup_dow"] = pd.Categorical(agg_dow["pickup_dow"], categories=dow_order, ordered=True)
agg_dow = agg_dow.sort_values("pickup_dow")

fig1 = px.bar(
    agg_dow,
    x="pickup_dow",
    y="trips",
    title="NYC Yellow Taxi Trips by Day of Week (Jan 2024)",
)
fig1.show()
out = ROOT / "dashboards"
out.mkdir(exist_ok=True)

fig1.write_image(out / "trips_by_dow.png", scale=2)


In [7]:
top15 = agg_zone.head(15).copy()

fig2 = px.bar(
    top15[::-1], #reverses the order for better visualization]
    x="trips",
    y="PU_Zone",
    color="PU_Borough",
    title="Top 15 NYC Taxi Pickup Zones (Jan 2024)",
)
fig2.update_layout(yaxis={'categoryorder':'total ascending'})
fig2.show()
out = ROOT / "dashboards"
out.mkdir(exist_ok=True)

fig2.write_image(out / "top_zones.png", scale=2)

In [8]:
fig3 = px.line(
    pay_unk,
    x="pickup_hour",
    y="unknown_payment_rate",
    title="Unknown Payment Type Rate by Hour",
    markers=True
)
fig3.update_xaxes(dtick=1)
fig3.show()
out = ROOT / "dashboards"
out.mkdir(exist_ok=True)

fig3.write_image(out / "unknown_payment_rate.png", scale=2)
