In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer

input_names = ['ZR_scale_factor', 'ROOF_WIDTH_scale_factor', 'ROAD_WIDTH_scale_factor']
csv_folder = "/project2/zhan248_1326/hhao4018/UQ_analysis/Hourly_CityMean_CSV_Summer_v4/"
output_dir = "/project2/zhan248_1326/hhao4018/UQ_analysis/PCE_Hourly_Stats_Plots_v4"
os.makedirs(output_dir, exist_ok=True)
unit_dict = {
    'T2': 'K', 'RH': '%', 'HFX': 'W/m²', 'TC_URB': 'K', 'WS': 'm/s',
    'SNET_URB': 'W/m²', 'LNET_URB': 'W/m²', 'PBLH': 'm', 'UC_URB': 'm/s',
    'GRDFLX': 'W/m²', 'LH_URB': 'W/m²', 'LH': 'W/m²', 'LWDNB': 'W/m²',
    'LWUPB': 'W/m²', 'QC_URB': 'kg/kg', 'SH_URB': 'kg/kg', 'SWDNB': 'W/m²',
    'SWUPB': 'W/m²', 'TB_URB': 'K', 'TG_URB': 'K', 'TR_URB': 'K', 'TS_URB': 'K',
    'LNET_plus_SNET_URB':'W/m²'
}

unit_official_names = {
    'T2': 'T$_2$','TC_URB': 'T$_C$', 
    'SNET_URB': 'SW$_{NET}$', 'LNET_URB': 'LW$_{NET}$', 'NET_URB': 'R$_{NET}$'
}

from numpy.polynomial import legendre
from sklearn.base import BaseEstimator, TransformerMixin

class OrthogonalPolynomialFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, degree=2, include_bias=True):
        self.degree = degree
        self.include_bias = include_bias

    def fit(self, X, y=None):
        self.X_min_ = X.min(axis=0)
        self.X_max_ = X.max(axis=0)
        n_features = X.shape[1]
        self.combinations_ = []
        for comb in itertools.product(range(self.degree + 1), repeat=n_features):
            if sum(comb) <= self.degree:
                if not self.include_bias and sum(comb) == 0:
                    continue
                self.combinations_.append(comb)
        self.combinations_.sort(key=lambda comb: (sum(comb), comb))
        return self

    def transform(self, X):
        X = np.asarray(X)
        n_samples, n_features = X.shape
        X_scaled = np.empty_like(X, dtype=float)
        for j in range(n_features):
            xmin = self.X_min_[j]
            xmax = self.X_max_[j]
            if xmax - xmin == 0:
                X_scaled[:, j] = 0.0
            else:
                X_scaled[:, j] = 2 * (X[:, j] - 0.5) - 1
        leg_values = [legendre.legvander(X_scaled[:, j], self.degree) for j in range(n_features)]
        features = []
        for comb in self.combinations_:
            prod = np.ones(n_samples)
            for j, deg in enumerate(comb):
                prod *= leg_values[j][:, deg]
            features.append(prod.reshape(-1, 1))
        return np.hstack(features)

Xinputs_file = "UMP_100sampling_input.csv"
X_input = pd.read_csv(Xinputs_file).iloc[:, 1:4].to_numpy()
X_valid = X_input[~np.isnan(X_input).any(axis=1)]

import itertools
from sklearn.linear_model import LinearRegression

hourly_summary = {"Summer": {}}

for season, folder in zip(["Summer"], [csv_folder]):   
    for hour in range(24):
        path = os.path.join(folder, f"Hour{hour:02d}_CityAvg.csv")

        if not os.path.exists(path):
            continue
        df = pd.read_csv(path)
        df["LNET_plus_SNET_URB"] = df["LNET_URB"] + df["SNET_URB"]
        df["NET_URB"] = df["LNET_plus_SNET_URB"]*697.7*60
        df["LNET_URB"] = df["LNET_URB"] *697.7*60
        df["SNET_URB"] = df["SNET_URB"]*697.7*60
        df_y = df.drop(columns=["Model"])
        stats = {}

        for out_var in df_y.columns:
            y = df_y[out_var].to_numpy()
            valid_mask = ~np.isnan(y) & (~np.isnan(X_input).any(axis=1))
            y_valid = y[valid_mask]
            X_use = X_valid[valid_mask]

            if len(y_valid) < 10:
                continue

            best_score = -np.inf
            best_deg   = 1
            for deg in range(1, 4):
                cv_pipe = Pipeline([
                    ("imputer", SimpleImputer(strategy="mean")),
                    ("poly",    OrthogonalPolynomialFeatures(degree=deg)),
                    ("reg",     LinearRegression())
                ])
                score = cross_val_score(
                    cv_pipe, X_valid, y_valid, cv=5, scoring="r2"
                ).mean()
                if score > best_score:
                    best_score = score
                    best_deg   = deg

            pipeline = Pipeline([
                ("imputer", SimpleImputer(strategy="mean")),
                ("poly",    OrthogonalPolynomialFeatures(degree=best_deg)),
                ("reg",     LinearRegression())
            ])
            pipeline.fit(X_valid, y_valid)

            pipeline.fit(X_valid, y_valid)
            
            reg    = pipeline.named_steps["reg"]
            coeffs = reg.coef_     # shape = (n_basis,)
            poly   = pipeline.named_steps["poly"]

            predicted_mean = reg.intercept_   
            norms = []
            for comb in poly.combinations_:
                norm = 1.0
                for deg in comb:
                    if deg > 0:
                        norm *= 2.0/(2*deg + 1)
                norms.append(norm)
            norms = np.array(norms)
            
            pce_var = np.sum(coeffs[1:]**2 * norms[1:])/2
            
            predicted_std = np.sqrt(pce_var)


            stats[out_var] = {"mean": predicted_mean, "std": predicted_std, "r2": best_score }

        hourly_summary[season][hour] = stats


# **Plot figures_Standard deviation**

In [3]:
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines   import Line2D
import matplotlib as mpl

mpl.rcParams['mathtext.default'] = 'rm'

plt.rcParams.update({
    "font.serif": ["Times New Roman"],
    "axes.linewidth": 1.2,
    "xtick.direction": "in",
    "ytick.direction": "in",
    "xtick.major.size": 4,
    "ytick.major.size": 4,
    "axes.grid": False,
    "axes.titlesize": 14,
    "axes.labelsize": 14,
    "xtick.labelsize": 14,
    "ytick.labelsize": 14,
    "legend.fontsize": 12,
})

summer_color       = "#d62728"
summer_light_color = "#ff9896"

variables = [
    "T2", "TC_URB",
    "RH", "WS",
    "PBLH", "SNET_URB",
    "LNET_URB", "NET_URB"
]

unit_dict = {
    'T2': 'K', 'RH': '%', 'HFX': 'W/m²', 'TC_URB': 'K', 'WS': 'm/s',
    'SNET_URB': 'W/m²', 'LNET_URB': 'W/m²', 'PBLH': 'm', 'UC_URB': 'm/s',
    'GRDFLX': 'W/m²', 'LH_URB': 'W/m²', 'LH': 'W/m²', 'LWDNB': 'W/m²',
    'LWUPB': 'W/m²', 'QC_URB': 'kg/kg', 'SH_URB': 'kg/kg', 'SWDNB': 'W/m²',
    'SWUPB': 'W/m²', 'TB_URB': 'K', 'TG_URB': 'K', 'TR_URB': 'K', 'TS_URB': 'K',
    'NET_URB': 'W/m²'
}

unit_official_names = {
    'T2': 'T$_2$','TC_URB': 'T$_C$', 'RH': 'RH', 'WS':'WS', 'PBLH':'PBLH',
    'SNET_URB': 'SW$_{NET}$', 'LNET_URB': 'LW$_{NET}$', 'NET_URB': 'R$_{NET}$'
}

output_dir = "/project2/zhan248_1326/hhao4018/UQ_analysis/PCE_Hourly_Stats_Plots_v4"
os.makedirs(output_dir, exist_ok=True)

fig, axes = plt.subplots(4, 2, figsize=(8, 8), sharex=True,constrained_layout=True)
axes = axes.flatten()

for idx, var in enumerate(variables):
    ax = axes[idx]
    ax2 = ax.twinx()

    me = np.array([hourly_summary["Summer"][h].get(var,{"mean":np.nan})["mean"] for h in range(24)])
    st = np.array([hourly_summary["Summer"][h].get(var,{"std": np.nan})["std"]   for h in range(24)])
    offset = -7
    h_local = np.mod(np.arange(24) + offset, 24)
    order = np.argsort(h_local)
    h0, m0, s0 = h_local[order], me[order], st[order]

    std_max = np.nanmax(s0)

    show_legend = (idx == 0)

    bar_colors = [
        summer_light_color if 7 <= hour <= 19 else '#d3d3d3'
        for hour in h0
    ]
    
    ax2.bar(
        h0, s0,
        color=bar_colors,
        width=0.8, alpha=0.7,
        label=f"SD" if show_legend else None,
        zorder=1
    )

    
    ax2.set_ylim(0, std_max * 1.1)

    ax.plot(
        h0, m0, '-o',
        color=summer_color, lw=2, markersize=4,
        label=f"Mean" if show_legend else None,
        zorder=2
    )

    ax.set_xlim(0, 23)
    ax.set_xticks(np.arange(0, 24, 4))
    if idx >= 6:  
        ax.set_xlabel("Hour of Day (Local Time)")
    if idx ==0:
        ax.set_ylim(288, 301)
        ax2.set_ylim(0, 0.4)
    if idx % 2 == 0: 
        ax.set_ylabel(f"Mean")
        ax2.set_ylabel("")
    else:
        ax.set_ylabel("")
        ax2.set_ylabel(f"SD", rotation=270, labelpad=15)

    ax.text(
        0.02, 1.15,
        f"({chr(97+idx)})",
        transform=ax.transAxes,
        fontsize=14, fontweight='bold', va='top', zorder=80
    )
    name = unit_official_names.get(var, "")
    ax.set_title(f"{name} [{unit_dict[var]}]", pad=2, fontweight='bold')

handles = [
    Line2D([0], [0], color=summer_color, lw=2, marker="o", label="Mean"),
    Patch(facecolor=summer_light_color, alpha=0.7, label="SD (Day)"),
    Patch(facecolor="lightgray",        alpha=0.7, label="SD (Night)"),
]

fig.legend(
    handles=handles,
    loc='lower center',
    ncol=len(handles),  
    frameon=False,
    bbox_to_anchor=(0.5, -0.02), 
    fontsize = 16
)

plt.tight_layout(h_pad=1.2, w_pad=0.4)
plt.subplots_adjust(bottom=0.12)
fig.savefig(os.path.join(output_dir, "summer_hourly_4x2.png"), dpi=600)
plt.close(fig)

  plt.tight_layout(h_pad=1.2, w_pad=0.4)


#  **Plot_r2**

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import pandas as pd

rows = []
for season in ["Summer"]:
    for hour, stats in hourly_summary[season].items():
        for var, vals in stats.items():
            rows.append({
                "Season":   season,
                "Hour":     hour,
                "Variable": var,
                "R2":       vals.get("r2", np.nan)
            })
df_r2 = pd.DataFrame(rows)

target_vars = [
    "T2", "TC_URB", "RH", "WS", "PBLH", "SNET_URB", "LNET_URB", "NET_URB"
]

unit_official_names = {
    'T2': 'T$_2$','TC_URB': 'T$_C$', 'RH': 'RH', 'WS':'WS', 'PBLH':'PBLH',
    'SNET_URB': 'SW$_{NET}$', 'LNET_URB': 'LW$_{NET}$', 'NET_URB': 'R$_{NET}$'
}
summer_color = "#d62728"

df_r2_summer = df_r2[
    (df_r2["Season"]=="Summer") & 
    (df_r2["Variable"].isin(target_vars))
].copy()

fig, axes = plt.subplots(4, 2, figsize=(10, 12), sharey=True)
axes = axes.flatten()

for idx, var in enumerate(target_vars):
    ax = axes[idx]
    sub = df_r2_summer[df_r2_summer["Variable"]==var]
    offset = -7
    hrs = np.mod(sub["Hour"].to_numpy() + offset, 24)
    idx_sort = np.argsort(hrs)
    hrs_s, r2_s = hrs[idx_sort], sub["R2"].to_numpy()[idx_sort]

    ax.bar(hrs_s, r2_s, color=summer_color, width=0.8)
    ax.set_xticks(np.arange(0, 24, 2))
    ax.set_xlim(-0.5, 23.5)
    ax.set_ylim(0, 1.05)
    if idx % 2 == 0:
        ax.set_ylabel("$R^2$", fontsize=18)
    else:
        ax.set_ylabel("")
    if idx >= 6:
        ax.set_xlabel("Hour (Local)", fontsize=18)
    else:
        ax.set_xlabel("")

    ax.set_title(unit_official_names.get(var, var), fontsize=18, fontweight='bold', pad=2)
    ax.text(0.01, 1.13, f"({chr(97+idx)})", transform=ax.transAxes, 
            fontsize=18, fontweight='bold', va='top',
            bbox=dict(boxstyle="round,pad=0.1", facecolor='white', edgecolor='none'))

plt.tight_layout(h_pad=1.2, w_pad=1.0)
fig.subplots_adjust(left=0.09, right=0.96, top=0.96, bottom=0.06)
outpath = os.path.join(output_dir, "Summer_Hourly_R2_8vars.png")
fig.savefig(outpath, dpi=300)
plt.close(fig)
print(f"Saved: {outpath}")


Saved: /project2/zhan248_1326/hhao4018/UQ_analysis/PCE_Hourly_Stats_Plots_v4/Summer_Hourly_R2_8vars.png
