In [2]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob as glob
import datetime as dt
import seaborn as sns

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io import shapereader
import cartopy.io.img_tiles as cimgt

from sklearn.linear_model import QuantileRegressor

In [3]:
icn_ds = xr.open_dataset('ICN_heat_metrics_1989-2024_2025-04-17_daily_max.nc')

era_ds = xr.open_dataset('/data/cristi/a/kchoo3/ERA5/reanalysis/analysis-ready/daily_max/ERA5_MIDWEST_heat_metrics_1940-2024_2025-04-15_daily_max.nc')
era_ds = era_ds.sortby('lat', ascending=True) # fix the lat coord to be ascending (it's unorganized for some reason)

In [4]:
def assign_coordinates(ds):
    station_locs = {
        'Champaign':   (40.085571, -88.240141),
        'Springfield': (39.721302, -89.613882),
        'Carbondale':  (37.699711, -89.244105),
    }

    stations = ds.station.values
    
    lats = [station_locs[name][0] for name in stations]
    lons = [station_locs[name][1] for name in stations]

    ds = ds.assign_coords({
        'lat': ('station', lats),
        'lon': ('station', lons),
    })

    return ds;

icn_ds = assign_coordinates(icn_ds)

In [5]:
# Quick validation
icn_ds.isel(station=2)

In [6]:
# ERA5 RH
def Load_Data(file_path):
    ddict_list = glob.glob(file_path)
    ddict_list = sorted(ddict_list)
    ddict = []
    for ds in ddict_list:
        ds = xr.open_dataset(ds)
        ddict.append(ds)
    return ddict

ds_dp = Load_Data("/data/cristi/a/kchoo3/ERA5/reanalysis/analysis-ready/ERA5_MIDWEST_2m_dewpoint_temperature_*.nc")
full_ds_dp = xr.concat(ds_dp, dim='time')
full_ds_dp = full_ds_dp.rename({'2m_dewpoint_temperature': 'd2m'})
full_ds_dp['d2m'] = full_ds_dp['d2m'] - 273.15

era_rh = full_ds_dp.d2m

# icn RH
icn_rh = xr.open_dataset('hourly/ICN_dewpoint_1989-2024_2025-04-17.nc')
icn_rh = assign_coordinates(icn_rh)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import QuantileRegressor
import numpy as np
import pandas as pd

def quantile_regression(y, X, quantile):
    """Return slope & intercept of a quantile‑regression line."""
    y = y.fillna(y.mean())
    qr = QuantileRegressor(quantile=quantile, alpha=0)
    qr.fit(X, y)
    return np.array([qr.coef_[0], qr.intercept_])

quantiles       = [0.95, 0.50]    
variables       = ["t2m", "d2m", "wb", "hi"]
variable_names  = [
    "surface temperature",
    "dew-point temperature",
    "wet‑bulb temperature",
    "heat‑index",
]

for station_i, station_name in enumerate(icn_ds.station.values):

    # --- one figure with four panels for this station ------------------------
    fig, axes = plt.subplots(
        nrows=2,
        ncols=2,
        figsize=(18, 14),
        sharex="col",
        sharey=False,
        constrained_layout=True
    )

    for var_i, variable in enumerate(variables):

        # pick the subplot to draw on
        ax = axes[var_i // 2, var_i % 2]

        # ---------------------------------------------------------------------
        # pull out data
        if variable == "d2m":
            icn_da = icn_rh[variable].isel(station=station_i)
            era_da = era_rh.sel(
            lat=icn_da.lat.data,
            lon=icn_da.lon.data + 360,
            method="nearest",
            )
            icn_da = icn_da.rename({'time': 'date'})
            era_da = era_da.rename({'time': 'date'})
        else:
            icn_da = icn_ds[variable].isel(station=station_i)

            era_da = era_ds[variable].sel(
            lat=icn_da.lat.data,
            lon=icn_da.lon.data + 360,
            method="nearest",
            )

        # adjust time slice for Carbondale (index 2) only
        year_i = "1990-01-01" if station_i == 2 else "1989-01-01"
        icn_da = icn_da.sel(date=slice(year_i, "2024-12-31"))
        era_da = era_da.sel(date=slice(year_i, "2024-12-31"))

        X = era_da.date.values[:, None].astype("datetime64[D]").astype("int64")

        # dataframe for seaborn box‑and‑whisker
        df = pd.DataFrame({
            "value":  np.concatenate([icn_da.values,             era_da.values]),
            "year":   np.concatenate([icn_da.date.dt.year.values, era_da.date.dt.year.values]),
            "series": (["ICN dataset"] * icn_da.size) + (["ERA5 dataset"] * era_da.size),
        })

        # ---------------------------------------------------------------------

        sns.boxplot(
            x="year", y="value", hue="series",
            data=df, showfliers=False, ax=ax,
            whis=(5, 95), fill=False
        )

        # add 95‑th and median quantile‑regression lines for each series
        for q in quantiles:
            slope, intercept = quantile_regression(era_da, X, q)
            ax.axline((0, intercept), slope=slope, linestyle="--", color="orange",
                      label=f"ERA5 q{int(q*100)}")

            slope, intercept = quantile_regression(icn_da, X, q)
            ax.axline((0, intercept), slope=slope, linestyle="--", color="blue",
                      label=f"ICN  q{int(q*100)}")

        # cosmetics for each panel
        ax.set_title(f"{variable_names[var_i].title()}")
        ax.set_xlabel("")
        ax.set_ylabel("Temperature (°C)")
        ax.tick_params(axis="x", rotation=45)
        ax.grid(axis="y", linestyle="--", alpha=0.3)

        #remove legend to put them outside
        ax.get_legend().remove()

    # place the legend outside
    handles, labels = axes[0, 0].get_legend_handles_labels()
    fig.legend(handles, labels, loc="center left", bbox_to_anchor=(1.02, 0.5))

    fig.suptitle(f"{station_name}, IL — Annual distribution of daily values", y=1.03)
    plt.show()


In [None]:
def quantile_regression(y, X, quantile):
    y = y.fillna(y.mean())
    qr = QuantileRegressor(quantile=quantile, alpha=0)
    qr.fit(X, y)
    return np.array([qr.coef_[0], qr.intercept_])

quantiles = [int('95'), int('50')]
variables = ['t2m', 'wb', 'hi']
variable_names = ["surface temperature","wetbulb temperature","heat index"]

for station_i in range(len(icn_ds.station.values)):
    for var_i, variable in enumerate(variables):
        icn_da = icn_ds[variable].isel(station=station_i)
        
        era_da = era_ds[variable].sel(lat=icn_da.lat.data, lon=(icn_da.lon.data+360), method='nearest')
        year_i = '1989-01-01'
        if station_i == 2:
            year_i = '1990-01-01'
            icn_da = icn_da.sel(date=slice(year_i, '2024-12-31'))
        
        era_da = era_da.sel(date=slice(year_i, '2024-12-31'))
    
        X = era_da.date.values[:, np.newaxis].astype('datetime64[D]').astype('int64')
        
        df = pd.DataFrame({
            "value": np.concatenate([icn_da.values, era_da.values]),
            "year": np.concatenate([icn_da.date.dt.year.values, era_da.date.dt.year.values]),
            "series": ["ICN Dataset"]*icn_da.size + ["ERA5 Dataset"]*era_da.size
        })
        
        plt.figure(figsize=(12, 6))
    
        # box plot
        ax = sns.boxplot(x="year", y="value", hue="series", data=df, showfliers=False)
    
        #strip plot
        # sns.stripplot(x="year", y="value", hue="series", data=df, jitter=0.05, size=2)
    
        # Quantile Regression
    
        slope, intercept = quantile_regression(era_da,X,0.95)
        ax.axline((0, intercept), slope=slope, linestyle='--')
        slope, intercept = quantile_regression(era_da,X,0.50)
        ax.axline((0, intercept), slope=slope, linestyle='--')
    
        slope, intercept = quantile_regression(icn_da,X,0.95)
        ax.axline((0, intercept), slope=slope, color='orange', linestyle='--')
        slope, intercept = quantile_regression(icn_da,X,0.50)
        ax.axline((0, intercept), slope=slope, color='orange', linestyle='--')
                      
        plt.xticks(rotation=45)
        plt.ylabel("Temperature")
        plt.xlabel("Year")
        plt.title(f"Annual Distribution of Daily {variable_names[var_i]} of {icn_da.station.data}, IL")
        plt.grid(axis="y", linestyle="--", alpha=0.3)
        plt.tight_layout()

In [None]:
def quantile_regression(y, X, quantile):
    qr = QuantileRegressor(quantile=quantile, alpha=0)
    qr.fit(X, y)
    return np.array([qr.coef_[0], qr.intercept_])

quantiles = [int('95'), int('50')]
variables = ['t2m', 'wb', 'hi']

X = era_da.date.values[:, np.newaxis].astype('datetime64[D]').astype('int64')

slope, intercept = quantile_regression(era_da,X,0.95)
print(slope * 265.25 * 35)
print(intercept)

In [None]:
icn_da.date.dt.year.values