# MC Sampling and draw figures

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from numpy.polynomial import legendre
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer

import matplotlib as mpl
mpl.rcParams['mathtext.default'] = 'rm'

# --- PCE Transformation Class （不变） ---
class OrthogonalPolynomialFeatures(BaseEstimator, TransformerMixin):
    def __init__(self, degree=3, include_bias=True):
        self.degree = degree
        self.include_bias = include_bias

    def fit(self, X, y=None):
        X = np.asarray(X)
        self.X_min_ = X.min(axis=0)
        self.X_max_ = X.max(axis=0)
        n_features = X.shape[1]
        combos = []
        for comb in itertools.product(range(self.degree + 1), repeat=n_features):
            if sum(comb) <= self.degree and (self.include_bias or sum(comb) > 0):
                combos.append(comb)
        combos.sort(key=lambda c: (sum(c), c))
        self.combinations_ = combos
        return self

    def transform(self, X):
        X = np.asarray(X)
        n_samples, n_features = X.shape
        X_scaled = np.empty_like(X, dtype=float)
        for j in range(n_features):
            xmin, xmax = self.X_min_[j], self.X_max_[j]
            if xmax == xmin:
                X_scaled[:, j] = 0
            else:
                X_scaled[:, j] = 2 * (X[:, j] - 0.5) - 1
        leg_vals = [
            legendre.legvander(X_scaled[:, j], self.degree) 
            for j in range(n_features)
        ]
        features = []
        for comb in self.combinations_:
            prod = np.ones(n_samples)
            for j, d in enumerate(comb):
                prod *= leg_vals[j][:, d]
            features.append(prod.reshape(-1, 1))
        return np.hstack(features)

# change to the daily data directory
summary_dir = "/project2/zhan248_1326/hhao4018/UQ_analysis/PCE_urban_nonurban_average_v4/"

input_dir   = "."
input_names = ["ZR_scale_factor","ROOF_WIDTH_scale_factor","ROAD_WIDTH_scale_factor"]
out_dir = "/project2/zhan248_1326/hhao4018/UQ_analysis/Additional_Sensitivity_v4/plots_hist_refined"
os.makedirs(out_dir, exist_ok=True)

scenarios = [
    ("summer", "day"),
    ("summer", "night"),
]
unit_dict = {
    'T2': 'K', 'RH': '%', 'HFX': 'W/m²', 'TC_URB': 'K', 'WS': 'm/s',
    'SNET_URB': 'W/m²', 'LNET_URB': 'W/m²', 'PBLH': 'm', 'UC_URB': 'm/s',
    'GRDFLX': 'W/m²', 'LH_URB': 'W/m²', 'LH': 'W/m²', 'LWDNB': 'W/m²',
    'LWUPB': 'W/m²', 'QC_URB': 'kg/kg', 'SH_URB': 'kg/kg', 'SWDNB': 'W/m²',
    'SWUPB': 'W/m²', 'TB_URB': 'K', 'TG_URB': 'K', 'TR_URB': 'K', 'TS_URB': 'K',
    'LNET_plus_SNET_URB':'W/m²', 'NET_URB': 'W/m²'
}

unit_official_names = {
    'T2': 'T$_2$','TC_URB': 'T$_C$', 
    'SNET_URB': 'SW$_{NET}$', 'LNET_URB': 'LW$_{NET}$', 'NET_URB': 'R$_{NET}$'
}

factor_vals = np.random.uniform(0.5, 1.5, size=(100, 1))

factor_vals_fix = np.random.uniform(1, 1, size=(1, 1))

g1, g2, g3  = np.meshgrid(factor_vals, factor_vals, factor_vals, indexing='ij')

X_grid      = np.vstack([g1.ravel(), g2.ravel(), g3.ravel()]).T

model_csv  = os.path.join(summary_dir, "summer_model_summary.csv")
input_csv  = os.path.join(input_dir,   "day_night_citymask_shadow_vars_summer.csv")
df_model   = pd.read_csv(model_csv)
df_in      = pd.read_csv(input_csv)
merged_tmp = pd.merge(df_model, df_in, on="Model", how="inner")
for p in ["day", "night"]:
    merged_tmp[f"NET_URB_{p}_city"] = (
        merged_tmp[f"LNET_URB_{p}_city"] + merged_tmp[f"SNET_URB_{p}_city"]
    )
var_cols  = [
    c for c in merged_tmp.columns 
    if c.endswith("_city") and c not in input_names + ["Model"]
]
var_bases = sorted({
    v.replace("_day_city", "").replace("_night_city", "")
    for v in var_cols
})

all_pipes = {}
all_W     = {}

color_map = {
    ('summer','day'):   'tab:blue',
    ('summer','night'): 'tab:orange',

}

for var_base in var_bases:
    pipes = {}
    for season, period in scenarios:
        df_model = pd.read_csv(os.path.join(summary_dir, f"{season}_model_summary.csv"))
        df_in    = pd.read_csv(os.path.join(input_dir,   f"day_night_citymask_shadow_vars_summer.csv"))
        merged   = pd.merge(df_model, df_in, on="Model", how="inner")

        merged[f"NET_URB_{period}_city"] = (
            merged[f"LNET_URB_{period}_city"] + merged[f"SNET_URB_{period}_city"]
        )
        X_base = merged[input_names].values
        y      = merged[f"{var_base}_{period}_city"].values

        best_score, best_deg = -np.inf, 1
        for deg in range(1, 4):
            pipe = Pipeline([
                ("imp",  SimpleImputer(strategy="mean")),
                ("poly", OrthogonalPolynomialFeatures(degree=deg)),
                ("reg",  LinearRegression())
            ])
            score = cross_val_score(pipe, X_base, y, cv=5, scoring="r2").mean()
            if score > best_score:
                best_score, best_deg = score, deg

        final_pipe = Pipeline([
            ("imp",  SimpleImputer(strategy="mean")),
            ("poly", OrthogonalPolynomialFeatures(degree=best_deg)),
            ("reg",  LinearRegression())
        ])
        final_pipe.fit(X_base, y)
        pipes[(season, period)] = final_pipe

    all_pipes[var_base] = pipes
    grid_preds = [
        pipes[("summer","day")].predict(X_grid),
        pipes[("summer","night")].predict(X_grid)
    ]
    spans = [pred.max() - pred.min() for pred in grid_preds]
    all_W[var_base] = max(spans)

row_vars = [
    ("T2",      "TC_URB"),
    ("RH",      "WS"),
    ("PBLH",    "SNET_URB"),
    ("LNET_URB","NET_URB")
]

fig, axes = plt.subplots(
    4, 4,
    figsize=(12, 10),
    gridspec_kw={
        "wspace": 0.2,  
        "hspace": 0.5  
    }
)


for i, (var_left, var_right) in enumerate(row_vars):
    for j, var_base in enumerate([var_left, var_right]):
        pipes = all_pipes[var_base]

        y_day   = pipes[("summer","day")].predict(X_grid)
        y_night = pipes[("summer","night")].predict(X_grid)
        span_day   = y_day.max()   - y_day.min()
        span_night = y_night.max() - y_night.min()
        W = max(span_day, span_night)

        med_day   = np.median(y_day)
        med_night = np.median(y_night)
        bins_day   = np.linspace(med_day - W/2, med_day + W/2, 50)
        bins_night = np.linspace(med_night - W/2, med_night + W/2, 50)
 
        h_day, _   = np.histogram(y_day,   bins=bins_day,   density=True)
        h_night, _ = np.histogram(y_night, bins=bins_night, density=True)
        ymax = max(h_day.max(), h_night.max()) * 1.05  

        for k, period in enumerate(["day","night"]):
            ax     = axes[i, j*2 + k]
            y_pred = y_day if period=="day" else y_night
            bins   = bins_day if period=="day" else bins_night

            median = np.median(y_pred)
            sigma  = np.std(y_pred)
            q75, q25 = np.percentile(y_pred, [75, 25])
            iqr    = q75 - q25
            mad    = np.median(np.abs(y_pred - median))

            ax.hist(
                y_pred, bins=bins, density=True,
                alpha=0.6,
                color=color_map[("summer", period)],
                edgecolor='black', linewidth=0.5
            )
            ax.set_xlim(median - W/2, median + W/2)
            ax.set_ylim(0, ymax)

            ax.set_title("Day" if period=="day" else "Night", pad=4)
            stats = f"SD={sigma:.2f}\nIQR={iqr:.2f}\nMAD={mad:.2f}"
            ax.text(
                0.95, 0.95, stats,
                transform=ax.transAxes,
                ha='right', va='top',
                fontsize=10,
                bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)
            )

            unit = unit_dict.get(var_base, "")
            name = unit_official_names.get(var_base, var_base)
            ax.set_xlabel(f"{name} [{unit}]" if unit else name)

            idx = i*4 + j*2 + k
            ax.text(
                0.02, 0.98, f"({chr(97+idx)})",
                transform=ax.transAxes,
                fontsize=12, fontweight='bold', va='top'
            )

plt.tight_layout()
fig.savefig(
    os.path.join(out_dir, "summer_4x4_histograms_centered_xy.png"),
    dpi=600, bbox_inches='tight'
)
plt.show()
plt.close(fig)



  plt.tight_layout()
  plt.tight_layout()
