From 827047b3c4f9acdf7eab23a2fca8c612a5749b73 Mon Sep 17 00:00:00 2001 From: "brandon s. tober" Date: Tue, 9 Sep 2025 10:10:51 -0400 Subject: [PATCH 1/2] Bug fix in exporting extra variables, added MCMC stats --- pygem/bin/run/run_calibration.py | 4 + pygem/bin/run/run_simulation.py | 4 +- pygem/output.py | 7 +- pygem/utils/stats.py | 136 +++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 pygem/utils/stats.py diff --git a/pygem/bin/run/run_calibration.py b/pygem/bin/run/run_calibration.py index a0446a7c..7f65a323 100755 --- a/pygem/bin/run/run_calibration.py +++ b/pygem/bin/run/run_calibration.py @@ -46,6 +46,7 @@ single_flowline_glacier_directory, single_flowline_glacier_directory_with_calving, ) +from pygem.utils.stats import mcmc_stats # from oggm.core import climate # from oggm.core.flowline import FluxBasedModel @@ -2135,6 +2136,9 @@ def must_melt(kp, tbias, ddfsnow, **kwargs): modelprms_export['mb_obs_mwea_err'] = [float(mb_obs_mwea_err)] modelprms_export['priors'] = priors + # compute stats on mcmc parameters + modelprms_export = mcmc_stats(modelprms_export) + modelprms_fn = glacier_str + '-modelprms_dict.json' modelprms_fp = [ ( diff --git a/pygem/bin/run/run_simulation.py b/pygem/bin/run/run_simulation.py index 1225bd50..40e07c43 100755 --- a/pygem/bin/run/run_simulation.py +++ b/pygem/bin/run/run_simulation.py @@ -606,7 +606,7 @@ def run(list_packed_vars): rgiid = main_glac_rgi.loc[main_glac_rgi.index.values[glac], 'RGIId'] try: - # for batman in [0]: + # for batman in [0]: # ===== Load glacier data: area (km2), ice thickness (m), width (km) ===== if ( @@ -1779,6 +1779,7 @@ def run(list_packed_vars): sim_endyear=args.sim_endyear, option_calibration=args.option_calibration, option_bias_adjustment=args.option_bias_adjustment, + extra_vars=args.export_extra_vars, ) for n_iter in range(nsims): # pass model params for iteration and update output dataset model params @@ -1878,6 +1879,7 @@ def run(list_packed_vars): sim_endyear=args.sim_endyear, option_calibration=args.option_calibration, option_bias_adjustment=args.option_bias_adjustment, + extra_vars=args.export_extra_vars, ) # create and return xarray dataset output_stats.create_xr_ds() diff --git a/pygem/output.py b/pygem/output.py index eb151808..5a906606 100644 --- a/pygem/output.py +++ b/pygem/output.py @@ -90,6 +90,7 @@ class single_glacier: sim_endyear: int option_calibration: str option_bias_adjustment: str + extra_vars: bool = False def __post_init__(self): """ @@ -492,7 +493,7 @@ def _update_dicts(self): } # optionally store extra variables - if pygem_prms['sim']['out']['export_extra_vars']: + if self.extra_vars: self.output_coords_dict['glac_prec_monthly'] = collections.OrderedDict( [('glac', self.glac_values), ('time', self.time_values)] ) @@ -791,8 +792,8 @@ class binned_stats(single_glacier): Flag indicating whether additional binned components are included in the dataset. """ - nbins: int - binned_components: bool + nbins: int = 0 + binned_components: bool = False def __post_init__(self): """ diff --git a/pygem/utils/stats.py b/pygem/utils/stats.py new file mode 100644 index 00000000..bb4cecfe --- /dev/null +++ b/pygem/utils/stats.py @@ -0,0 +1,136 @@ +""" +Python Glacier Evolution Model (PyGEM) + +copyright © 2018 David Rounce + +Distrubted under the MIT lisence + +Model statistics module +""" + +import numpy as np +import arviz as az + +def effective_n(x): + """ + Compute the effective sample size of a trace. + + Takes the trace and computes the effective sample size + according to its detrended autocorrelation. + + Parameters + ---------- + x : list or array of chain samples + + Returns + ------- + effective_n : int + effective sample size + """ + if len(set(x)) == 1: + return 1 + try: + # detrend trace using mean to be consistent with statistics + # definition of autocorrelation + x = np.asarray(x) + x = (x - x.mean()) + # compute autocorrelation (note: only need second half since + # they are symmetric) + rho = np.correlate(x, x, mode='full') + rho = rho[len(rho)//2:] + # normalize the autocorrelation values + # note: rho[0] is the variance * n_samples, so this is consistent + # with the statistics definition of autocorrelation on wikipedia + # (dividing by n_samples gives you the expected value). + rho_norm = rho / rho[0] + # Iterate until sum of consecutive estimates of autocorrelation is + # negative to avoid issues with the sum being -0.5, which returns an + # effective_n of infinity + negative_autocorr = False + t = 1 + n = len(x) + while not negative_autocorr and (t < n): + if not t % 2: + negative_autocorr = sum(rho_norm[t-1:t+1]) < 0 + t += 1 + return int(n / (1 + 2*rho_norm[1:t].sum())) + except: + return None + + +def mcmc_stats(chains_dict, + params=['tbias','kp','ddfsnow','ddfice','rhoabl', 'rhoacc','mb_mwea']): + """ + Compute per-chain and overall summary stats for MCMC samples. + + Parameters + ---------- + chains_dict : dict + Dictionary with structure: + { + "param1": { + "chain1": [...], + "chain2": [...], + ... + }, + ... + } + + Returns + ------- + summary_stats : dict + Dictionary with structure: + { + "param1": { + "mean": [...], # per chain + "std": [...], + "median": [...], + "q025": [...], + "q975": [...], + "ess": ..., # overall + "r_hat": ... # overall + }, + ... + } + """ + summary_stats = {} + + for param, chains in chains_dict.items(): + if param not in params: + continue + + # Stack chains into array: shape (n_chains, n_samples) + chain_names = sorted(chains) # ensure consistent order + samples = np.array([chains[c] for c in chain_names]) + + # Per-chain stats + means = np.mean(samples, axis=1).tolist() + stds = np.std(samples, axis=1, ddof=1).tolist() + medians = np.median(samples, axis=1).tolist() + q25 = np.quantile(samples, 0.25, axis=1).tolist() + q75 = np.quantile(samples, 0.75, axis=1).tolist() + ess = [effective_n(x) for x in samples] + # Overall stats (R-hat) + if samples.shape[0] > 1: + # calculate the gelman-rubin stat for each variable across all chains + # pass chains as 2d array to arviz using the from_dict() method + # convert the chains into an InferenceData object + idata = az.from_dict(posterior={param: samples}) + # calculate the Gelman-Rubin statistic (rhat) + r_hat = float(az.rhat(idata).to_array().values[0]) + else : + r_hat = None + + summary_stats[param] = { + "mean": means, + "std": stds, + "median": medians, + "q25": q25, + "q75": q75, + "ess": ess, + "r_hat": r_hat + } + + chains_dict['_summary_stats_'] = summary_stats + + return chains_dict \ No newline at end of file From a64a921fd1234f69bcccbb0f1449331981530681 Mon Sep 17 00:00:00 2001 From: "brandon s. tober" Date: Tue, 9 Sep 2025 10:19:55 -0400 Subject: [PATCH 2/2] Applied ruff formating/linting --- pygem/bin/run/run_simulation.py | 2 +- pygem/utils/stats.py | 37 ++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/pygem/bin/run/run_simulation.py b/pygem/bin/run/run_simulation.py index 40e07c43..3066befe 100755 --- a/pygem/bin/run/run_simulation.py +++ b/pygem/bin/run/run_simulation.py @@ -606,7 +606,7 @@ def run(list_packed_vars): rgiid = main_glac_rgi.loc[main_glac_rgi.index.values[glac], 'RGIId'] try: - # for batman in [0]: + # for batman in [0]: # ===== Load glacier data: area (km2), ice thickness (m), width (km) ===== if ( diff --git a/pygem/utils/stats.py b/pygem/utils/stats.py index bb4cecfe..a09b15a2 100644 --- a/pygem/utils/stats.py +++ b/pygem/utils/stats.py @@ -8,8 +8,9 @@ Model statistics module """ -import numpy as np import arviz as az +import numpy as np + def effective_n(x): """ @@ -33,11 +34,11 @@ def effective_n(x): # detrend trace using mean to be consistent with statistics # definition of autocorrelation x = np.asarray(x) - x = (x - x.mean()) + x = x - x.mean() # compute autocorrelation (note: only need second half since # they are symmetric) rho = np.correlate(x, x, mode='full') - rho = rho[len(rho)//2:] + rho = rho[len(rho) // 2 :] # normalize the autocorrelation values # note: rho[0] is the variance * n_samples, so this is consistent # with the statistics definition of autocorrelation on wikipedia @@ -51,15 +52,17 @@ def effective_n(x): n = len(x) while not negative_autocorr and (t < n): if not t % 2: - negative_autocorr = sum(rho_norm[t-1:t+1]) < 0 + negative_autocorr = sum(rho_norm[t - 1 : t + 1]) < 0 t += 1 - return int(n / (1 + 2*rho_norm[1:t].sum())) + return int(n / (1 + 2 * rho_norm[1:t].sum())) except: return None - -def mcmc_stats(chains_dict, - params=['tbias','kp','ddfsnow','ddfice','rhoabl', 'rhoacc','mb_mwea']): + +def mcmc_stats( + chains_dict, + params=['tbias', 'kp', 'ddfsnow', 'ddfice', 'rhoabl', 'rhoacc', 'mb_mwea'], +): """ Compute per-chain and overall summary stats for MCMC samples. @@ -118,19 +121,19 @@ def mcmc_stats(chains_dict, idata = az.from_dict(posterior={param: samples}) # calculate the Gelman-Rubin statistic (rhat) r_hat = float(az.rhat(idata).to_array().values[0]) - else : + else: r_hat = None summary_stats[param] = { - "mean": means, - "std": stds, - "median": medians, - "q25": q25, - "q75": q75, - "ess": ess, - "r_hat": r_hat + 'mean': means, + 'std': stds, + 'median': medians, + 'q25': q25, + 'q75': q75, + 'ess': ess, + 'r_hat': r_hat, } chains_dict['_summary_stats_'] = summary_stats - return chains_dict \ No newline at end of file + return chains_dict