In [None]:
import requests
import pandas as pd
import altair as alt

In [None]:
def mta_ridership_data(url):
    '''
    Query the API to pull daily ridership data.
    '''
    response = requests.get(url).json()
    df = pd.DataFrame(response)
    df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')
    return df

mta_df = mta_ridership_data("https://data.ny.gov/resource/vxuj-8kew.json?$limit=9999999999999")
mta_df

Unnamed: 0,date,subways_total_estimated_ridership,subways_of_comparable_pre_pandemic_day,buses_total_estimated_ridersip,buses_of_comparable_pre_pandemic_day,lirr_total_estimated_ridership,lirr_of_comparable_pre_pandemic_day,metro_north_total_estimated_ridership,metro_north_of_comparable_pre_pandemic_day,access_a_ride_total_scheduled_trips,access_a_ride_of_comparable_pre_pandemic_day,bridges_and_tunnels_total_traffic,bridges_and_tunnels_of_comparable_pre_pandemic_day,staten_island_railway_total_estimated_ridership,staten_island_railway_of_comparable_pre_pandemic_day
0,2020-03-01,2212965,0.97,984908,0.99,86790,1.00,55825,0.59,19922,1.13,786960,0.98,1636,0.52
1,2020-03-02,5329915,0.96,2209066,0.99,321569,1.03,180701,0.66,30338,1.02,874619,0.95,17140,1.07
2,2020-03-03,5481103,0.98,2228608,0.99,319727,1.02,190648,0.69,32767,1.10,882175,0.96,17453,1.09
3,2020-03-04,5498809,0.99,2177165,0.97,311662,0.99,192689,0.70,34297,1.15,905558,0.98,17136,1.07
4,2020-03-05,5496453,0.99,2244515,1.00,307597,0.98,194386,0.70,33209,1.12,929298,1.01,17203,1.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1771,2025-01-05,1791020,0.86,618750,0.67,96683,1.17,79761,0.88,23665,1.75,726411,1.06,2039,0.74
1772,2025-01-06,3436491,0.67,1238969,0.61,229503,0.76,202575,0.75,35840,1.27,792617,0.91,6887,0.42
1773,2025-01-07,3781536,0.74,1275936,0.62,245930,0.81,220945,0.82,38048,1.34,842968,0.97,7409,0.45
1774,2025-01-08,3830616,0.74,1256260,0.61,239444,0.79,209550,0.78,39406,1.39,852748,0.98,7798,0.48


In [None]:
def weather_data(lat,lon,start,end):
    '''
    Query the API for relevant weather data: precipitation (rain),
    temperature, humidity.
    '''
    url = f"https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}&start_date={start}&end_date={end}&hourly=temperature_2m&daily=temperature_2m_mean,precipitation_sum,precipitation_hours&timezone=America%2FNew_York"
    json_data = requests.get(url).json()
    daily_data = json_data.get('daily', {}) # there is some other info the json, but it isnt necessary for my project
    df = pd.DataFrame(daily_data)
    df.rename(columns={'time': 'date'}, inplace=True)
    return df

In [None]:
weather_df = weather_data(40.7478,73.98503,"2020-03-01","2024-10-01")
weather_df

Unnamed: 0,date,temperature_2m_mean,precipitation_sum,precipitation_hours
0,2020-03-01,-3.8,0.0,0.0
1,2020-03-02,-4.8,0.0,0.0
2,2020-03-03,-2.8,0.0,0.0
3,2020-03-04,-4.4,0.1,1.0
4,2020-03-05,-5.3,14.2,16.0
...,...,...,...,...
1671,2024-09-27,-0.5,17.5,13.0
1672,2024-09-28,-3.0,0.0,0.0
1673,2024-09-29,2.3,0.0,0.0
1674,2024-09-30,1.9,1.8,5.0


In [None]:
def merge_data(mta_df,weather_df):
    '''
    Merge the MTA ridership and weather data on the date column.
    '''
    merged_df = pd.merge(mta_df,weather_df,on = "date")
    return merged_df
merged_df = merge_data(mta_df,weather_df)
df = merged_df
df['date'] = pd.to_datetime(df['date'])
df['day_of_week'] = df['date'].dt.day_name()

columns_to_convert = [
    'subways_total_estimated_ridership',
    'staten_island_railway_total_estimated_ridership',
    'bridges_and_tunnels_total_traffic',
    'metro_north_total_estimated_ridership',
    'lirr_total_estimated_ridership',
    'buses_total_estimated_ridersip'
]
for col in columns_to_convert:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df['precipitation_sum'] = pd.to_numeric(df['precipitation_sum'], errors='coerce')


day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['day_of_week'] = pd.Categorical(df['day_of_week'], categories=day_order, ordered=True)
df['subways_total_estimated_ridership'] = df['subways_total_estimated_ridership'].astype(float)
df = df.rename(columns={"buses_total_estimated_ridersip": "buses_total_estimated_ridership"})
df

Unnamed: 0,date,subways_total_estimated_ridership,subways_of_comparable_pre_pandemic_day,buses_total_estimated_ridership,buses_of_comparable_pre_pandemic_day,lirr_total_estimated_ridership,lirr_of_comparable_pre_pandemic_day,metro_north_total_estimated_ridership,metro_north_of_comparable_pre_pandemic_day,access_a_ride_total_scheduled_trips,access_a_ride_of_comparable_pre_pandemic_day,bridges_and_tunnels_total_traffic,bridges_and_tunnels_of_comparable_pre_pandemic_day,staten_island_railway_total_estimated_ridership,staten_island_railway_of_comparable_pre_pandemic_day,temperature_2m_mean,precipitation_sum,precipitation_hours,day_of_week
0,2020-03-01,2212965.0,0.97,984908,0.99,86790,1.00,55825,0.59,19922,1.13,786960,0.98,1636,0.52,-3.8,0.0,0.0,Sunday
1,2020-03-02,5329915.0,0.96,2209066,0.99,321569,1.03,180701,0.66,30338,1.02,874619,0.95,17140,1.07,-4.8,0.0,0.0,Monday
2,2020-03-03,5481103.0,0.98,2228608,0.99,319727,1.02,190648,0.69,32767,1.10,882175,0.96,17453,1.09,-2.8,0.0,0.0,Tuesday
3,2020-03-04,5498809.0,0.99,2177165,0.97,311662,0.99,192689,0.70,34297,1.15,905558,0.98,17136,1.07,-4.4,0.1,1.0,Wednesday
4,2020-03-05,5496453.0,0.99,2244515,1.00,307597,0.98,194386,0.70,33209,1.12,929298,1.01,17203,1.08,-5.3,14.2,16.0,Thursday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1671,2024-09-27,3945383.0,0.68,1438524,0.62,258258,0.79,221034,0.77,35968,1.21,1010505,1.06,6992,0.41,-0.5,17.5,13.0,Friday
1672,2024-09-28,2557982.0,0.79,863284,0.62,134408,1.14,132177,0.87,22142,1.29,922085,0.97,2928,0.71,-3.0,0.0,0.0,Saturday
1673,2024-09-29,1959363.0,0.77,664367,0.60,117572,1.18,102805,0.98,22340,1.30,869345,0.98,2782,0.95,2.3,0.0,0.0,Sunday
1674,2024-09-30,3849138.0,0.67,1486507,0.64,256547,0.78,217897,0.76,34701,1.17,924795,0.97,7845,0.46,1.9,1.8,5.0,Monday


In [None]:
def prepare_mta_data(df: pd.DataFrame):
    """
    Prepare data for plotting Subways, Buses, and Bridges & Tunnels.
    """

    totals = {
        'subways_total_estimated_ridership': 'Subways',
        'buses_total_estimated_ridership': 'Buses',
        'bridges_and_tunnels_total_traffic': 'Bridges & Tunnels'
    }
    percents = {
        'subways_of_comparable_pre_pandemic_day': 'Subways',
        'buses_of_comparable_pre_pandemic_day': 'Buses',
        'bridges_and_tunnels_of_comparable_pre_pandemic_day': 'Bridges & Tunnels'
    }

    total_long = (
        df[['date'] + list(totals.keys())]
        .rename(columns=totals)
        .melt(id_vars='date', var_name='mode', value_name='value_total')
    )

    pct_long = (
        df[['date'] + list(percents.keys())]
        .rename(columns=percents)
        .melt(id_vars='date', var_name='mode', value_name='value_pct')
    )

    tidy = total_long.merge(pct_long, on=['date', 'mode'])
    return tidy[['date', 'mode', 'value_total', 'value_pct']]






In [None]:
def make_usage_chart(tidy: pd.DataFrame):
    """
    Build an interactive Altair chart showing NYC transportation usage over time.
    - Checkbox toggles between totals and percentages.
    - Legend click to show/hide series.
    - Brush to zoom/pan on the x-axis.
    Expects tidy columns: ['date','mode','value_total','value_pct'].
    """
    alt.data_transformers.disable_max_rows()

    # Checkbox: totals vs percentage
    toggle = alt.param(
        name="Show_Percentage",
        bind=alt.binding_checkbox(name=" Show percentage"),
        value=False
    )

    # Legend toggle & brush
    legend_sel = alt.selection_point(fields=["mode"], bind="legend")
    brush = alt.selection_interval(encodings=["x"])

    # Main lines (compute y via param-driven expression)
    lines = (
        alt.Chart(tidy)
        .add_params(toggle, legend_sel, brush)
        .transform_filter(legend_sel)
        .transform_calculate(
            # Refer to the param by name inside the expression
            y_value="Show_Percentage ? datum.value_pct : datum.value_total"
        )
        .mark_line()
        .encode(
            x=alt.X("date:T", title="Date").scale(domain=brush),
            y=alt.Y("y_value:Q", title="Usage (Counts or % of Comparable Day)"),
            color=alt.Color("mode:N", legend=alt.Legend(title="Mode")),
            tooltip=[
                alt.Tooltip("date:T", title="Date"),
                alt.Tooltip("mode:N", title="Mode"),
                alt.Tooltip("value_total:Q", title="Total", format=","),
                alt.Tooltip("value_pct:Q", title="% of Pre-Pandemic", format=".1f")
            ],
        )
        .properties(width=900, height=420, title="NYC Travel Usage Over Time (2020–2024)")
    )


    return lines


In [None]:
def make_mta_chart(
    df_raw: pd.DataFrame,
    save_html: str | None = None):
    """
    Orchestrates the full flow:
      1) Prepare data with prepare_mta_data(df_raw)
      2) Build interactive chart with plot_mta_usage(...)
      3) Optionally save to a self-contained HTML file
    """
    tidy = prepare_mta_data(df_raw)
    chart = make_usage_chart(tidy)
    if save_html:
        chart.save(save_html)  # writes a standalone HTML
    return chart


In [None]:
def call_mta_usage(df: pd.DataFrame, save_html_path: str | None = None):
    # Don’t mutate the original dataframe
    raw_df = df.copy()

    # Prepare tidy version for plotting
    tidy = prepare_mta_data(raw_df)

    # Build chart
    chart = make_usage_chart(tidy)

    # Optionally save chart
    if save_html_path:
        chart.save(save_html_path)

    return chart


call_mta_usage(df)