Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pygem/bin/run/run_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
single_flowline_glacier_directory,
single_flowline_glacier_directory_with_calving,
)
from pygem.utils.stats import mcmc_stats

# from oggm.core import climate
# from oggm.core.flowline import FluxBasedModel
Expand Down Expand Up @@ -2135,6 +2136,9 @@ def must_melt(kp, tbias, ddfsnow, **kwargs):
modelprms_export['mb_obs_mwea_err'] = [float(mb_obs_mwea_err)]
modelprms_export['priors'] = priors

# compute stats on mcmc parameters
modelprms_export = mcmc_stats(modelprms_export)

modelprms_fn = glacier_str + '-modelprms_dict.json'
modelprms_fp = [
(
Expand Down
2 changes: 2 additions & 0 deletions pygem/bin/run/run_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1779,6 +1779,7 @@ def run(list_packed_vars):
sim_endyear=args.sim_endyear,
option_calibration=args.option_calibration,
option_bias_adjustment=args.option_bias_adjustment,
extra_vars=args.export_extra_vars,
)
for n_iter in range(nsims):
# pass model params for iteration and update output dataset model params
Expand Down Expand Up @@ -1878,6 +1879,7 @@ def run(list_packed_vars):
sim_endyear=args.sim_endyear,
option_calibration=args.option_calibration,
option_bias_adjustment=args.option_bias_adjustment,
extra_vars=args.export_extra_vars,
)
# create and return xarray dataset
output_stats.create_xr_ds()
Expand Down
7 changes: 4 additions & 3 deletions pygem/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class single_glacier:
sim_endyear: int
option_calibration: str
option_bias_adjustment: str
extra_vars: bool = False

def __post_init__(self):
"""
Expand Down Expand Up @@ -492,7 +493,7 @@ def _update_dicts(self):
}

# optionally store extra variables
if pygem_prms['sim']['out']['export_extra_vars']:
if self.extra_vars:
self.output_coords_dict['glac_prec_monthly'] = collections.OrderedDict(
[('glac', self.glac_values), ('time', self.time_values)]
)
Expand Down Expand Up @@ -791,8 +792,8 @@ class binned_stats(single_glacier):
Flag indicating whether additional binned components are included in the dataset.
"""

nbins: int
binned_components: bool
nbins: int = 0
binned_components: bool = False

def __post_init__(self):
"""
Expand Down
139 changes: 139 additions & 0 deletions pygem/utils/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""
Python Glacier Evolution Model (PyGEM)

copyright © 2018 David Rounce <drounce@cmu.edu>

Distrubted under the MIT lisence

Model statistics module
"""

import arviz as az
import numpy as np


def effective_n(x):
"""
Compute the effective sample size of a trace.

Takes the trace and computes the effective sample size
according to its detrended autocorrelation.

Parameters
----------
x : list or array of chain samples

Returns
-------
effective_n : int
effective sample size
"""
if len(set(x)) == 1:
return 1
try:
# detrend trace using mean to be consistent with statistics
# definition of autocorrelation
x = np.asarray(x)
x = x - x.mean()
# compute autocorrelation (note: only need second half since
# they are symmetric)
rho = np.correlate(x, x, mode='full')
rho = rho[len(rho) // 2 :]
# normalize the autocorrelation values
# note: rho[0] is the variance * n_samples, so this is consistent
# with the statistics definition of autocorrelation on wikipedia
# (dividing by n_samples gives you the expected value).
rho_norm = rho / rho[0]
# Iterate until sum of consecutive estimates of autocorrelation is
# negative to avoid issues with the sum being -0.5, which returns an
# effective_n of infinity
negative_autocorr = False
t = 1
n = len(x)
while not negative_autocorr and (t < n):
if not t % 2:
negative_autocorr = sum(rho_norm[t - 1 : t + 1]) < 0
t += 1
return int(n / (1 + 2 * rho_norm[1:t].sum()))
except:
return None


def mcmc_stats(
chains_dict,
params=['tbias', 'kp', 'ddfsnow', 'ddfice', 'rhoabl', 'rhoacc', 'mb_mwea'],
):
"""
Compute per-chain and overall summary stats for MCMC samples.

Parameters
----------
chains_dict : dict
Dictionary with structure:
{
"param1": {
"chain1": [...],
"chain2": [...],
...
},
...
}

Returns
-------
summary_stats : dict
Dictionary with structure:
{
"param1": {
"mean": [...], # per chain
"std": [...],
"median": [...],
"q025": [...],
"q975": [...],
"ess": ..., # overall
"r_hat": ... # overall
},
...
}
"""
summary_stats = {}

for param, chains in chains_dict.items():
if param not in params:
continue

# Stack chains into array: shape (n_chains, n_samples)
chain_names = sorted(chains) # ensure consistent order
samples = np.array([chains[c] for c in chain_names])

# Per-chain stats
means = np.mean(samples, axis=1).tolist()
stds = np.std(samples, axis=1, ddof=1).tolist()
medians = np.median(samples, axis=1).tolist()
q25 = np.quantile(samples, 0.25, axis=1).tolist()
q75 = np.quantile(samples, 0.75, axis=1).tolist()
ess = [effective_n(x) for x in samples]
# Overall stats (R-hat)
if samples.shape[0] > 1:
# calculate the gelman-rubin stat for each variable across all chains
# pass chains as 2d array to arviz using the from_dict() method
# convert the chains into an InferenceData object
idata = az.from_dict(posterior={param: samples})
# calculate the Gelman-Rubin statistic (rhat)
r_hat = float(az.rhat(idata).to_array().values[0])
else:
r_hat = None

summary_stats[param] = {
'mean': means,
'std': stds,
'median': medians,
'q25': q25,
'q75': q75,
'ess': ess,
'r_hat': r_hat,
}

chains_dict['_summary_stats_'] = summary_stats

return chains_dict