In [1]:
import requests
import pandas as pd
import altair as alt

In [2]:
def mta_ridership_data(url):
    '''
    Query the API to pull daily ridership data.
    '''
    response = requests.get(url).json()
    df = pd.DataFrame(response)
    df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')
    return df

mta_df = mta_ridership_data("https://data.ny.gov/resource/vxuj-8kew.json?$limit=9999999999999")
mta_df

Unnamed: 0,date,subways_total_estimated_ridership,subways_of_comparable_pre_pandemic_day,buses_total_estimated_ridersip,buses_of_comparable_pre_pandemic_day,lirr_total_estimated_ridership,lirr_of_comparable_pre_pandemic_day,metro_north_total_estimated_ridership,metro_north_of_comparable_pre_pandemic_day,access_a_ride_total_scheduled_trips,access_a_ride_of_comparable_pre_pandemic_day,bridges_and_tunnels_total_traffic,bridges_and_tunnels_of_comparable_pre_pandemic_day,staten_island_railway_total_estimated_ridership,staten_island_railway_of_comparable_pre_pandemic_day
0,2020-03-01,2212965,0.97,984908,0.99,86790,1.00,55825,0.59,19922,1.13,786960,0.98,1636,0.52
1,2020-03-02,5329915,0.96,2209066,0.99,321569,1.03,180701,0.66,30338,1.02,874619,0.95,17140,1.07
2,2020-03-03,5481103,0.98,2228608,0.99,319727,1.02,190648,0.69,32767,1.10,882175,0.96,17453,1.09
3,2020-03-04,5498809,0.99,2177165,0.97,311662,0.99,192689,0.70,34297,1.15,905558,0.98,17136,1.07
4,2020-03-05,5496453,0.99,2244515,1.00,307597,0.98,194386,0.70,33209,1.12,929298,1.01,17203,1.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1771,2025-01-05,1791020,0.86,618750,0.67,96683,1.17,79761,0.88,23665,1.75,726411,1.06,2039,0.74
1772,2025-01-06,3436491,0.67,1238969,0.61,229503,0.76,202575,0.75,35840,1.27,792617,0.91,6887,0.42
1773,2025-01-07,3781536,0.74,1275936,0.62,245930,0.81,220945,0.82,38048,1.34,842968,0.97,7409,0.45
1774,2025-01-08,3830616,0.74,1256260,0.61,239444,0.79,209550,0.78,39406,1.39,852748,0.98,7798,0.48


In [3]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google'

In [None]:
def weather_data(lat,lon,start,end):
    '''
    Query the API for relevant weather data: precipitation (rain),
    temperature, humidity.
    '''
    url = f"https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}&start_date={start}&end_date={end}&hourly=temperature_2m&daily=temperature_2m_mean,precipitation_sum,precipitation_hours&timezone=America%2FNew_York"
    json_data = requests.get(url).json()
    daily_data = json_data.get('daily', {}) # there is some other info the json, but it isnt necessary for my project
    df = pd.DataFrame(daily_data)
    df.rename(columns={'time': 'date'}, inplace=True)
    return df


In [None]:
weather_df = weather_data(40.7478,73.98503,"2020-03-01","2024-10-01")
weather_df

Unnamed: 0,date,temperature_2m_mean,precipitation_sum,precipitation_hours
0,2020-03-01,-3.9,0.0,0.0
1,2020-03-02,-4.8,0.0,0.0
2,2020-03-03,-2.7,0.1,1.0
3,2020-03-04,-4.3,0.0,0.0
4,2020-03-05,-5.5,14.5,17.0
...,...,...,...,...
1671,2024-09-27,-0.5,16.2,12.0
1672,2024-09-28,-2.9,0.0,0.0
1673,2024-09-29,2.3,0.0,0.0
1674,2024-09-30,1.8,1.8,5.0


In [None]:
def merge_data(mta_df,weather_df):
    '''
    Merge the MTA ridership and weather data on the date column.
    '''
    merged_df = pd.merge(mta_df,weather_df,on = "date")
    return merged_df
merged_df = merge_data(mta_df,weather_df)
df = merged_df
df['date'] = pd.to_datetime(df['date'])
df['day_of_week'] = df['date'].dt.day_name()

columns_to_convert = [
    'subways_total_estimated_ridership',
    'staten_island_railway_total_estimated_ridership',
    'bridges_and_tunnels_total_traffic',
    'metro_north_total_estimated_ridership',
    'lirr_total_estimated_ridership',
    'buses_total_estimated_ridersip'
]
for col in columns_to_convert:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df['precipitation_sum'] = pd.to_numeric(df['precipitation_sum'], errors='coerce')


day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['day_of_week'] = pd.Categorical(df['day_of_week'], categories=day_order, ordered=True)
df['subways_total_estimated_ridership'] = df['subways_total_estimated_ridership'].astype(float)
df = df.rename(columns={"buses_total_estimated_ridersip": "buses_total_estimated_ridership"})
df


Unnamed: 0,date,subways_total_estimated_ridership,subways_of_comparable_pre_pandemic_day,buses_total_estimated_ridership,buses_of_comparable_pre_pandemic_day,lirr_total_estimated_ridership,lirr_of_comparable_pre_pandemic_day,metro_north_total_estimated_ridership,metro_north_of_comparable_pre_pandemic_day,access_a_ride_total_scheduled_trips,access_a_ride_of_comparable_pre_pandemic_day,bridges_and_tunnels_total_traffic,bridges_and_tunnels_of_comparable_pre_pandemic_day,staten_island_railway_total_estimated_ridership,staten_island_railway_of_comparable_pre_pandemic_day,temperature_2m_mean,precipitation_sum,precipitation_hours,day_of_week
0,2020-03-01,2212965.0,0.97,984908,0.99,86790,1.00,55825,0.59,19922,1.13,786960,0.98,1636,0.52,-3.9,0.0,0.0,Sunday
1,2020-03-02,5329915.0,0.96,2209066,0.99,321569,1.03,180701,0.66,30338,1.02,874619,0.95,17140,1.07,-4.8,0.0,0.0,Monday
2,2020-03-03,5481103.0,0.98,2228608,0.99,319727,1.02,190648,0.69,32767,1.10,882175,0.96,17453,1.09,-2.7,0.1,1.0,Tuesday
3,2020-03-04,5498809.0,0.99,2177165,0.97,311662,0.99,192689,0.70,34297,1.15,905558,0.98,17136,1.07,-4.3,0.0,0.0,Wednesday
4,2020-03-05,5496453.0,0.99,2244515,1.00,307597,0.98,194386,0.70,33209,1.12,929298,1.01,17203,1.08,-5.5,14.5,17.0,Thursday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1671,2024-09-27,3945383.0,0.68,1438524,0.62,258258,0.79,221034,0.77,35968,1.21,1010505,1.06,6992,0.41,-0.5,16.2,12.0,Friday
1672,2024-09-28,2557982.0,0.79,863284,0.62,134408,1.14,132177,0.87,22142,1.29,922085,0.97,2928,0.71,-2.9,0.0,0.0,Saturday
1673,2024-09-29,1959363.0,0.77,664367,0.60,117572,1.18,102805,0.98,22340,1.30,869345,0.98,2782,0.95,2.3,0.0,0.0,Sunday
1674,2024-09-30,3849138.0,0.67,1486507,0.64,256547,0.78,217897,0.76,34701,1.17,924795,0.97,7845,0.46,1.8,1.8,5.0,Monday


In [None]:
import pandas as pd

def prepare_mta_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Prepare data for plotting Subways, Buses, and Bridges & Tunnels.
    """

    totals = {
        'subways_total_estimated_ridership': 'Subways',
        'buses_total_estimated_ridership': 'Buses',
        'bridges_and_tunnels_total_traffic': 'Bridges & Tunnels'
    }
    percents = {
        'subways_of_comparable_pre_pandemic_day': 'Subways',
        'buses_of_comparable_pre_pandemic_day': 'Buses',
        'bridges_and_tunnels_of_comparable_pre_pandemic_day': 'Bridges & Tunnels'
    }

    total_long = (
        df[['date'] + list(totals.keys())]
        .rename(columns=totals)
        .melt(id_vars='date', var_name='mode', value_name='value_total')
    )

    pct_long = (
        df[['date'] + list(percents.keys())]
        .rename(columns=percents)
        .melt(id_vars='date', var_name='mode', value_name='value_pct')
    )

    tidy = total_long.merge(pct_long, on=['date', 'mode'])
    return tidy[['date', 'mode', 'value_total', 'value_pct']]



https://data.ny.gov/Transportation/MTA-Daily-Ridership-Data-2020-2025/vxuj-8kew/explore/query/SELECT%0A%20%20%60date%60%2C%0A%20%20%60subways_total_estimated_ridership%60%2C%0A%20%20%60subways_of_comparable_pre_pandemic_day%60%2C%0A%20%20%60buses_total_estimated_ridersip%60%2C%0A%20%20%60buses_of_comparable_pre_pandemic_day%60%2C%0A%20%20%60lirr_total_estimated_ridership%60%2C%0A%20%20%60lirr_of_comparable_pre_pandemic_day%60%2C%0A%20%20%60metro_north_total_estimated_ridership%60%2C%0A%20%20%60metro_north_of_comparable_pre_pandemic_day%60%2C%0A%20%20%60access_a_ride_total_scheduled_trips%60%2C%0A%20%20%60access_a_ride_of_comparable_pre_pandemic_day%60%2C%0A%20%20%60bridges_and_tunnels_total_traffic%60%2C%0A%20%20%60bridges_and_tunnels_of_comparable_pre_pandemic_day%60%2C%0A%20%20%60staten_island_railway_total_estimated_ridership%60%2C%0A%20%20%60staten_island_railway_of_comparable_pre_pandemic_day%60%0AORDER%20BY%20%60date%60%20ASC%20NULL%20LAST/page/filter

In [None]:
!pip install vegafusion



In [None]:
!pip install vl-convert-python -q

In [None]:
import altair as alt
import pandas as pd

def bin_precipitation(precipitation):
    '''
    Split the precipitation up into different levels for the heat map
    '''
    if precipitation == 0:
        return 'No Rain'
    elif precipitation <= 0.1:
        return 'Light Rain'
    elif precipitation <= 0.5:
        return 'Moderate Rain'
    else:
        return 'Heavy Rain'


def prepare_precipitation_data(df):
    '''
    Prepare data for the heatmap by binning precipitation and calculating averages
    '''
    # Create a copy to avoid modifying original
    df_copy = df.copy()

    # Apply binning using the separate function
    df_copy['precipitation_level'] = df_copy['precipitation_sum'].apply(bin_precipitation)

    # Define order for precipitation levels
    precip_order = ['No Rain', 'Light Rain', 'Moderate Rain', 'Heavy Rain']
    df_copy['precipitation_level'] = pd.Categorical(df_copy['precipitation_level'],
                                                     categories=precip_order,
                                                     ordered=True)

    # Calculate mean ridership by day and precipitation level
    # Added observed=False to silence the FutureWarning
    grouped_df = df_copy.groupby(['day_of_week', 'precipitation_level'], observed=False)[
        'subways_total_estimated_ridership'].mean().reset_index()
    grouped_df.columns = ['day_of_week', 'precipitation_level', 'avg_ridership']

    return grouped_df, precip_order


def create_heatmap(grouped_df, precip_order):
    '''
    Create an Altair heatmap visualization from prepared data
    '''
    # Calculate a threshold for text color based on the data range
    min_val = grouped_df['avg_ridership'].min()
    max_val = grouped_df['avg_ridership'].max()
    # Use 50% of the range as threshold for text color (adjusted for single-hue palette)
    threshold = min_val + (max_val - min_val) * 0.5

    # Create the base heatmap with Blues color scheme
    heatmap = alt.Chart(grouped_df).mark_rect().encode(
        x=alt.X('precipitation_level:O',
                title='Precipitation Level',
                sort=precip_order,
                axis=alt.Axis(labelAngle=-45)),  # Rotate labels
        y=alt.Y('day_of_week:O',
                title='Day of the Week'),
        color=alt.Color('avg_ridership:Q',
                        title='Avg Daily Ridership',
                        scale=alt.Scale(scheme='blues')),
        tooltip=[
            alt.Tooltip('day_of_week:O', title='Day'),
            alt.Tooltip('precipitation_level:O', title='Precipitation'),
            alt.Tooltip('avg_ridership:Q', title='Avg Ridership', format=',.1f')
        ]
    )

    # Add text annotations with better color logic
    text = alt.Chart(grouped_df).mark_text(baseline='middle').encode(
        x=alt.X('precipitation_level:O', sort=precip_order),
        y='day_of_week:O',
        text=alt.Text('avg_ridership:Q', format='.1f'),
        color=alt.condition(
            alt.datum.avg_ridership > threshold,
            alt.value('white'),  # White text for high values (dark background)
            alt.value('black')   # Black text for low values (light background)
        )
    )

    # Combine heatmap and text with styling
    chart = (heatmap + text).properties(
        width=400,
        height=300,
        title='Average Subway Ridership by Day of Week and Precipitation Level'
    ).configure_axis(
        labelFontSize=12,
        titleFontSize=14
    ).configure_title(
        fontSize=16
    )

    return chart


# Usage
grouped_data, precipitation_order = prepare_precipitation_data(df)
chart = create_heatmap(grouped_data, precipitation_order)
chart

In [None]:
import altair as alt
import pandas as pd

alt.data_transformers.enable("vegafusion")

# Four Periods
def categorize_period(date):
    if date < pd.Timestamp('2020-03-15'):
        return 'Pre-Pandemic'
    elif date < pd.Timestamp('2021-07-01'):
        return 'During Pandemic'
    elif date < pd.Timestamp('2023-01-01'):
        return 'Recovery Phase'
    else:
        return 'Post-Pandemic'

df['period'] = df['date'].apply(categorize_period)

# Weather
def categorize_weather(row):
    precip = row['precipitation_sum']
    temp = row['temperature_2m_mean']

    if pd.isna(precip):
        return None
    if precip == 0:
        return 'Sunny'
    if temp < 0 and precip > 0:
        if precip < 5:
            return 'Light Snow'
        elif precip < 15:
            return 'Moderate Snow'
        else:
            return 'Heavy Snow'
    else:
        if precip <= 2:
            return 'Drizzle'
        elif precip <= 8:
            return 'Light Rain'
        elif precip <= 20:
            return 'Moderate Rain'
        elif precip <= 35:
            return 'Heavy Rain'
        else:
            return 'Storm'

df['weather_category'] = df.apply(categorize_weather, axis=1)

subway_weather = df[['subways_total_estimated_ridership', 'weather_category', 'period']].copy()
subway_weather = subway_weather.dropna()
subway_weather.rename(columns={'subways_total_estimated_ridership': 'Ridership'}, inplace=True)

chart = alt.Chart(subway_weather).transform_density(
    density='Ridership',
    bandwidth=150000,
    groupby=['weather_category', 'period'],
    extent=[0, 6000000],
    counts=True,
    steps=200
).mark_area(
    orient='horizontal',
    opacity=0.8,
    interpolate='monotone'
).encode(
    x=alt.X('density:Q', stack='center', impute=None, title=None,
            axis=alt.Axis(labels=False, values=[0], grid=False, ticks=False)),
    y=alt.Y('value:Q', title='Daily Subway Ridership',
            scale=alt.Scale(zero=False), axis=alt.Axis(format='~s', labelFontSize=11)),
    color=alt.Color('weather_category:N',
                    scale=alt.Scale(
                        domain=['Sunny', 'Drizzle', 'Light Rain', 'Moderate Rain',
                                'Heavy Rain', 'Storm', 'Light Snow', 'Moderate Snow', 'Heavy Snow'],
                        range=['gold', 'cyan', 'deepskyblue', 'dodgerblue', 'blue', 'navy', 'lightcyan', 'skyblue', 'purple']
                    ),
                    legend=alt.Legend(title='Weather Condition', orient='right', titleFontSize=13, labelFontSize=10, symbolSize=150)),
    column=alt.Column('period:N', title='Time Period',
                      header=alt.Header(labelAngle=0, labelAlign='center', titleFontSize=14, labelFontSize=12),
                      sort=['Pre-Pandemic', 'During Pandemic', 'Recovery Phase', 'Post-Pandemic']),
    tooltip=[
        alt.Tooltip('weather_category:N', title='Weather'),
        alt.Tooltip('period:N', title='Period'),
        alt.Tooltip('value:Q', title='Ridership', format=',.0f')
    ]
).properties(
    width=140,
    height=550,
    title={
        'text': 'NYC Subway Ridership: Weather Impact Across Pandemic Timeline',
        'subtitle': 'Distribution across different weather conditions (2020-2024)',
        'fontSize': 18,
        'subtitleFontSize': 13
    }
).configure_view(strokeWidth=0)

chart

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd

path = '/content/drive/.shortcut-targets-by-id/1N5ZcInPQIEikJHXC9v-SCoPrZdkGKtaj/ds4200_project/cleaned_all_years_2020_2024_ny_air_quality.csv'

df = pd.read_csv(path)
df.head()


Unnamed: 0,date,source,daily_mean,daily_aqi
0,2020-01-01,AQS,2.3,13
1,2020-01-04,AQS,12.0,56
2,2020-01-07,AQS,8.2,46
3,2020-01-10,AQS,6.1,34
4,2020-01-13,AQS,7.8,43


In [None]:
# -- 1)
import pandas as pd
import numpy as np
import altair as alt

alt.data_transformers.disable_max_rows()
try:
    alt.theme.enable('none')
except Exception:
    alt.themes.enable('none')

# 2）
aqi_path = '/content/drive/.shortcut-targets-by-id/1N5ZcInPQIEikJHXC9v-SCoPrZdkGKtaj/ds4200_project/cleaned_all_years_2020_2024_ny_air_quality.csv'
air = pd.read_csv(aqi_path)

need_cols = [c for c in air.columns if c.lower() in {'date','source','daily_mean','daily_aqi'}]
air = air[need_cols].copy()
air['date'] = pd.to_datetime(air['date']).dt.strftime('%Y-%m-%d')

# -- 3)
if 'mta_df' not in globals():
    import requests
    def mta_ridership_data(url):
        js = requests.get(url).json()
        df = pd.DataFrame(js)
        df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')
        cand = [c for c in df.columns if any(k in c.lower() for k in
                   ['ridership','subway','subways_total','estimated_total','total'])]
        for c in cand:
            df[c] = pd.to_numeric(df[c], errors='coerce')
        rid_col = None
        for c in cand:
            if pd.api.types.is_numeric_dtype(df[c]):
                rid_col = c
                break
        if rid_col is None:
            raise ValueError('未在 MTA 数据中找到可用的客流列，请检查 API 字段。')
        df = df[['date', rid_col]].rename(columns={rid_col:'ridership'})
        return df

    mta_df = mta_ridership_data("https://data.ny.gov/resource/vxuj-8kew.json?$limit=9999999999999")
else:
    tmp = mta_df.copy()
    tmp['date'] = pd.to_datetime(tmp['date']).dt.strftime('%Y-%m-%d')
    rid_col = None
    for c in tmp.columns:
        if any(k in c.lower() for k in ['ridership','subway','subways_total','estimated_total','total']):
            if pd.api.types.is_numeric_dtype(pd.to_numeric(tmp[c], errors='coerce')):
                rid_col = c; break
    if rid_col is None:
        raise ValueError('未在已有 mta_df 中找到客流列，请检查列名。')
    mta_df = tmp[['date', rid_col]].rename(columns={rid_col:'ridership'})

# -- 4)
merged = pd.merge(mta_df, air, on='date', how='inner')
merged['date'] = pd.to_datetime(merged['date'])

# -- 5)
df = merged.copy().dropna(subset=['date','daily_aqi']).sort_values('date')
df['period'] = 'Post-COVID'
df.loc[df['date'] <  pd.Timestamp('2020-03-01'), 'period'] = 'Pre-COVID'
df.loc[(df['date'] >= pd.Timestamp('2020-03-01')) & (df['date'] <= pd.Timestamp('2021-06-30')), 'period'] = 'During-COVID'
df.loc[(df['date'] >  pd.Timestamp('2021-06-30')) & (df['date'] <= pd.Timestamp('2022-12-31')), 'period'] = 'Recovery'

# -- 6)
brush = alt.selection_interval(encodings=['x'], name='DateRange')
aqi_min, aqi_max = float(df['daily_aqi'].min()), float(df['daily_aqi'].max())
aqi_param = alt.param(
    name='AQIMax', value=min(120, aqi_max),
    bind=alt.binding_range(min=aqi_min, max=aqi_max, step=1, name='Max AQI: ')
)
exclude_expr = "!(toDate(datum.date) >= toDate('2020-03-01') && toDate(datum.date) <= toDate('2020-03-21'))"

# -- 7)
top = (
    alt.Chart(df)
    .mark_line()
    .encode(
        x=alt.X('date:T', axis=alt.Axis(format='%b %Y', tickCount='month', title='Date')),
        y=alt.Y('daily_aqi:Q', title='Daily AQI'),
        color=alt.Color('period:N', title='Period'),
        tooltip=['date:T','daily_aqi:Q','period:N']
    )
    .add_params(brush, aqi_param)
    .transform_filter(exclude_expr)
    .properties(width=800, height=140, title='Brush to filter the scatter below')
)

# -- 8)
main = (
    alt.Chart(df)
    .transform_filter(brush)
    .transform_filter('datum.daily_aqi <= AQIMax')
    .transform_filter(exclude_expr)
    .mark_point(opacity=0.6, size=35)
    .encode(
        x=alt.X('date:T', axis=alt.Axis(format='%Y-%m', title='Date')),
        y=alt.Y('ridership:Q', title='Ridership'),
        color=alt.Color('period:N', title='Period'),
        tooltip=['date:T', alt.Tooltip('ridership:Q', title='Ridership'),
                 'daily_aqi:Q','period:N']
    )
    .properties(width=800, height=360, title='Subway Ridership over Time under AQI Threshold')
)

reg = (
    alt.Chart(df)
    .transform_filter(brush)
    .transform_filter('datum.daily_aqi <= AQIMax')
    .transform_filter(exclude_expr)
    .transform_regression('date', 'ridership', groupby=['period'])
    .mark_line(strokeDash=[4,2])
    .encode(x='date:T', y='ridership:Q', color='period:N')
)

chart = alt.vconcat(top, main + reg).resolve_scale(color='shared')
chart


In [None]:
import pandas as pd
import altair as alt

# ---- 1)
df = merged.copy().dropna(subset=['date','daily_aqi']).sort_values('date')
df['date'] = pd.to_datetime(df['date'])

df['period'] = 'Post-COVID'
# Pre-COVID
df.loc[df['date'] <= pd.Timestamp('2020-03-21'), 'period'] = 'Pre-COVID'
# During-COVID
df.loc[(df['date'] >= pd.Timestamp('2020-03-22')) &
       (df['date'] <= pd.Timestamp('2021-06-30')), 'period'] = 'During-COVID'
# Recovery
df.loc[(df['date'] >  pd.Timestamp('2021-06-30')) &
       (df['date'] <= pd.Timestamp('2022-12-31')), 'period'] = 'Recovery'
# Post-COVID

# ---- 2)
brush = alt.selection_interval(encodings=['x'], name='DateRange')
aqi_min, aqi_max = float(df['daily_aqi'].min()), float(df['daily_aqi'].max())
aqi_param = alt.param(
    name='AQIMax', value=min(120, aqi_max),
    bind=alt.binding_range(min=aqi_min, max=aqi_max, step=1, name='Max AQI: ')
)

x_domain = [pd.Timestamp('2020-03-01'), df['date'].max()]

period_domain = ['Pre-COVID', 'During-COVID', 'Recovery', 'Post-COVID']
period_colors = ['#2ca02c',   '#1f77b4',      '#d62728',  '#ff7f0e']  # 绿/蓝/红/橙

# ---- 3)
top = (
    alt.Chart(df)
    .mark_line()
    .encode(
        x=alt.X('date:T',
                axis=alt.Axis(format='%b %Y', tickCount='month', title='Date'),
                scale=alt.Scale(domain=x_domain)),
        y=alt.Y('daily_aqi:Q', title='Daily AQI'),
        color=alt.Color('period:N', title='Period',
                        scale=alt.Scale(domain=period_domain, range=period_colors)),
        tooltip=['date:T','daily_aqi:Q','period:N']
    )
    .add_params(brush, aqi_param)
    .properties(width=800, height=140, title='Brush to filter the scatter below')
)

# ---- 4)
main = (
    alt.Chart(df)
    .transform_filter(brush)
    .transform_filter('datum.daily_aqi <= AQIMax')
    .mark_point(opacity=0.6, size=35)
    .encode(
        x=alt.X('date:T',
                axis=alt.Axis(format='%Y-%m', title='Date'),
                scale=alt.Scale(domain=x_domain)),
        y=alt.Y('ridership:Q', title='Ridership'),
        color=alt.Color('period:N', title='Period',
                        scale=alt.Scale(domain=period_domain, range=period_colors)),
        tooltip=['date:T', alt.Tooltip('ridership:Q', title='Ridership'),
                 'daily_aqi:Q','period:N']
    )
    .properties(width=800, height=360,
                title='Subway Ridership over Time under AQI Threshold')
)

reg = (
    alt.Chart(df)
    .transform_filter(brush)
    .transform_filter('datum.daily_aqi <= AQIMax')
    .transform_regression('date', 'ridership', groupby=['period'])
    .mark_line(strokeDash=[4,2])
    .encode(
        x=alt.X('date:T', scale=alt.Scale(domain=x_domain)),
        y='ridership:Q',
        color=alt.Color('period:N',
                        scale=alt.Scale(domain=period_domain, range=period_colors))
    )
)

chart = alt.vconcat(top, main + reg).resolve_scale(color='shared')
chart


In [None]:
#NYC_2023subway_ridership iframe
<iframe width="700" height="600" allow="local-network-access; geolocation" title="nyc_subway2023" src="https://nu.maps.arcgis.com/apps/mapviewer/index.html?configurableview=true&webmap=1a11ba87cc6642538dd7b9e79e07bfd8&theme=light&scroll=false&center=-73.97172145699336,40.73841003121678&scale=72223.819286" ></iframe>
#NYC_2020subway_ridership iframe
<iframe width="700" height="600" allow="local-network-access; geolocation" title="nyc_subway2020" src="https://nu.maps.arcgis.com/apps/mapviewer/index.html?configurableview=true&webmap=15ad1408e8774c15909bd0e8dd9f3a7f&theme=light&scroll=false&center=-73.96597883718917,40.73350915450238&scale=72223.819286" ></iframe>
