From 8fb5a47ee86cd0dbf706d613f92975f68aa20b55 Mon Sep 17 00:00:00 2001 From: johnhg Date: Tue, 23 Feb 2021 18:03:08 -0700 Subject: [PATCH] Feature 1451 crpss (#1676) * Per #1450, add new ECNT columns for Hersback CRPS. Still need to actually compute the stats though. * Per #1450, update NumArray functions to only sort if the data is not yet sorted. And check for bad data when computing the standard deviation. * Per #1450, add code to compute the empirical CRPS value. * Per #1450, large change to the new output for the empirical CRPS. In order to aggregate decomposed empirical CRPS reliability and potential correctly, we'd need to write (n+1)*2 additional columns. While the empirical crps can be aggregated as a weighted mean, the decomposition cannot. It just isn't feasible to do this in the ECNT line type. If this reliability and potential really are required, recommend that we add an entirely new CRPS line type instead of tacking onto ECNT. These changes simply remove reliabilit and potential from the output. * Per #1450 and #1451, replacing single CRPS_CLIMO column with CRPSCL and CRPSCL_EMP which will be needed for #1451. * Per #1450, delete temp files I'd accidentally committed. * Per #1450, update the user's guide with CRPS updates. * Per #1451, instead of computing the climo crps on the fly, compute and store it separately for each point. * Per #1451, the ECNT line type will no longer be written separately for each CDF bin. Removing the bin-related arguments from the write_ecnt functions. * Per #1451, the climo_cdf.write_bins option no longer applies. Since Ensemble-Stat will no longer compute stats separately for each climo bin, I'm removing the reference to write_bins from the Ensemble-Stat config files. * Per #1451, compute and store the climo CRPS for each point. Also, break apart the normal climo computation into separate functions for crps, ign, and pit. * Per #1451, update Ensemble-Stat logic to no longer subset pairs into climo CDF bins. We had done this to be consistent with the use of climo data in point and grid-stat. But this change to the handling of climo data is consitent with the NOAA/EMC approach. * Per #1451, split out the setting of climo CDF thresholds into a separate function so that it can also be called by stat-analysis. * Per #1451, in the Ensemble-Stat ORANK line type, rename CLIMO to CLIMO_MEAN and add a CLIMO_STDEV column. * Per #1451, also need to update gsidens2orank to write a climo_stdev column. * Per #1451, switch from constant pointer to ClimoCDFInfo object to a copy to make the logic of doing this in Stat-Analysis a little easier. * Per #1451, the HiRA method in Point-Stat computes an ECNT output line type. Needed to call set_climo_cdf() there so that we know how many climo values to use when computing the empirical climo CRPS. * Per #1451, need to store climo_cdf for both grid and point verification. * Per #1451, update to write the CLIMO_STDEV header column for the ORANK line type. * Per #1451, in Ensemble-Stat when doing point verification, check for empty OBS_UNIT string and write NA instead. * Per #1451, update unit tests by enhancing the climatology call to Ensemble-Stat to also include point verification. Tweak the Ensemble-Stat cofiguration for that and also add a call to pb2nc to prepare the point observations for use. * Per #1450, added a new section to the Ensemble-Stat chapter describing how climo mean/stdev are used in the computation of the skill scores. * Update ensemble-stat.rst Co-authored-by: j-opatz <59586397+j-opatz@users.noreply.github.com> --- met/data/config/EnsembleStatConfig_default | 3 +- .../table_files/met_header_columns_V10.0.txt | 2 +- met/docs/Users_Guide/ensemble-stat.rst | 31 +- met/scripts/config/EnsembleStatConfig | 3 +- met/src/basic/vx_config/config_constants.h | 9 +- met/src/basic/vx_config/config_util.cc | 110 ++++---- met/src/basic/vx_util/stat_column_defs.h | 16 +- met/src/libcode/vx_analysis_util/stat_job.cc | 3 +- met/src/libcode/vx_stat_out/stat_columns.cc | 34 +-- met/src/libcode/vx_stat_out/stat_columns.h | 2 +- met/src/libcode/vx_statistics/ens_stats.cc | 61 ++-- .../vx_statistics/pair_data_ensemble.cc | 265 +++++++++--------- .../vx_statistics/pair_data_ensemble.h | 21 +- .../tools/core/ensemble_stat/ensemble_stat.cc | 78 ++---- .../ensemble_stat/ensemble_stat_conf_info.cc | 19 +- met/src/tools/core/point_stat/point_stat.cc | 3 +- .../core/stat_analysis/aggr_stat_line.cc | 36 ++- .../tools/core/stat_analysis/aggr_stat_line.h | 1 - .../core/stat_analysis/parse_stat_line.cc | 11 +- .../core/stat_analysis/parse_stat_line.h | 2 +- .../tools/other/gsi_tools/gsidens2orank.cc | 3 +- test/config/EnsembleStatConfig_climo | 11 +- test/xml/unit_climatology.xml | 3 + test/xml/unit_pb2nc.xml | 19 ++ 24 files changed, 358 insertions(+), 388 deletions(-) diff --git a/met/data/config/EnsembleStatConfig_default b/met/data/config/EnsembleStatConfig_default index 9687446d3e..81c0c30c77 100644 --- a/met/data/config/EnsembleStatConfig_default +++ b/met/data/config/EnsembleStatConfig_default @@ -189,9 +189,8 @@ climo_stdev = { // May be set separately in each "obs.field" entry // climo_cdf = { - cdf_bins = 1; + cdf_bins = 10; center_bins = FALSE; - write_bins = TRUE; } //////////////////////////////////////////////////////////////////////////////// diff --git a/met/data/table_files/met_header_columns_V10.0.txt b/met/data/table_files/met_header_columns_V10.0.txt index d2c8ae5c4f..d6111a61b2 100644 --- a/met/data/table_files/met_header_columns_V10.0.txt +++ b/met/data/table_files/met_header_columns_V10.0.txt @@ -11,7 +11,7 @@ V10.0 : STAT : NBRCTC : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID V10.0 : STAT : NBRCTS : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL BASER BASER_NCL BASER_NCU BASER_BCL BASER_BCU FMEAN FMEAN_NCL FMEAN_NCU FMEAN_BCL FMEAN_BCU ACC ACC_NCL ACC_NCU ACC_BCL ACC_BCU FBIAS FBIAS_BCL FBIAS_BCU PODY PODY_NCL PODY_NCU PODY_BCL PODY_BCU PODN PODN_NCL PODN_NCU PODN_BCL PODN_BCU POFD POFD_NCL POFD_NCU POFD_BCL POFD_BCU FAR FAR_NCL FAR_NCU FAR_BCL FAR_BCU CSI CSI_NCL CSI_NCU CSI_BCL CSI_BCU GSS GSS_BCL GSS_BCU HK HK_NCL HK_NCU HK_BCL HK_BCU HSS HSS_BCL HSS_BCU ODDS ODDS_NCL ODDS_NCU ODDS_BCL ODDS_BCU LODDS LODDS_NCL LODDS_NCU LODDS_BCL LODDS_BCU ORSS ORSS_NCL ORSS_NCU ORSS_BCL ORSS_BCU EDS EDS_NCL EDS_NCU EDS_BCL EDS_BCU SEDS SEDS_NCL SEDS_NCU SEDS_BCL SEDS_BCU EDI EDI_NCL EDI_NCU EDI_BCL EDI_BCU SEDI SEDI_NCL SEDI_NCU SEDI_BCL SEDI_BCU BAGSS BAGSS_BCL BAGSS_BCU V10.0 : STAT : GRAD : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL FGBAR OGBAR MGBAR EGBAR S1 S1_OG FGOG_RATIO DX DY V10.0 : STAT : DMAP : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL FY OY FBIAS BADDELEY HAUSDORFF MED_FO MED_OF MED_MIN MED_MAX MED_MEAN FOM_FO FOM_OF FOM_MIN FOM_MAX FOM_MEAN ZHU_FO ZHU_OF ZHU_MIN ZHU_MAX ZHU_MEAN -V10.0 : STAT : ORANK : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL INDEX OBS_SID OBS_LAT OBS_LON OBS_LVL OBS_ELV OBS PIT RANK N_ENS_VLD (N_ENS) ENS_[0-9]* OBS_QC ENS_MEAN CLIMO SPREAD ENS_MEAN_OERR SPREAD_OERR SPREAD_PLUS_OERR +V10.0 : STAT : ORANK : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL INDEX OBS_SID OBS_LAT OBS_LON OBS_LVL OBS_ELV OBS PIT RANK N_ENS_VLD (N_ENS) ENS_[0-9]* OBS_QC ENS_MEAN CLIMO_MEAN SPREAD ENS_MEAN_OERR SPREAD_OERR SPREAD_PLUS_OERR CLIMO_STDEV V10.0 : STAT : PCT : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL (N_THRESH) THRESH_[0-9]* OY_[0-9]* ON_[0-9]* V10.0 : STAT : PJC : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL (N_THRESH) THRESH_[0-9]* OY_TP_[0-9]* ON_TP_[0-9]* CALIBRATION_[0-9]* REFINEMENT_[0-9]* LIKELIHOOD_[0-9]* BASER_[0-9]* V10.0 : STAT : PRC : VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE TOTAL (N_THRESH) THRESH_[0-9]* PODY_[0-9]* POFD_[0-9]* diff --git a/met/docs/Users_Guide/ensemble-stat.rst b/met/docs/Users_Guide/ensemble-stat.rst index 99532dcaf6..9a4b8f476e 100644 --- a/met/docs/Users_Guide/ensemble-stat.rst +++ b/met/docs/Users_Guide/ensemble-stat.rst @@ -6,7 +6,7 @@ Ensemble-Stat Tool Introduction ____________ -The Ensemble-Stat tool may be run to create simple ensemble forecasts (mean, probability, spread, etc) from a set of several forecast model files to be used by the MET statistics tools. If observations are also included, ensemble statistics such as rank histograms, probability integral transform histograms, spread/skill variance, relative position and continuous ranked probability score are produced. A climatology file may also be provided, and will be used as a reference forecast in several of the output statistics. Finally, observation error perturbations can be included prior to calculation of statistics. Details about and equations for the statistics produced for ensembles are given in :numref:`Appendix C, Section %s `. +The Ensemble-Stat tool may be run to create simple ensemble forecasts (mean, probability, spread, etc) from a set of several forecast model files to be used by the MET statistics tools. If observations are also included, ensemble statistics such as rank histograms, probability integral transform histograms, spread/skill variance, relative position and continuous ranked probability score are produced. Climatological mean and standard deviation data may also be provided, and will be used as a reference forecast in several of the output statistics. Finally, observation error perturbations can be included prior to calculation of statistics. Details about and equations for the statistics produced for ensembles are given in :numref:`Appendix C, Section %s `. Scientific and statistical aspects __________________________________ @@ -33,6 +33,11 @@ The relative position (RELP) is a count of the number of times each ensemble mem The ranked probability score (RPS) is included in the Ranked Probability Score (RPS) line type. It is the mean of the Brier scores computed from ensemble probabilities derived for each probability category threshold (prob_cat_thresh) specified in the configuration file. The continuous ranked probability score (CRPS) is the average the distance between the forecast (ensemble) cumulative distribution function and the observation cumulative distribution function. It is an analog of the Brier score, but for continuous forecast and observation fields. The CRPS statistic is computed using two methods: assuming a normal distribution defined by the ensemble mean and spread (:ref:`Gneiting et al., 2004 `) and using the empirical ensemble distribution (:ref:`Hersbach, 2000 `). The CRPS statistic is included in the Ensemble Continuous Statistics (ECNT) line type, along with other statistics quantifying the ensemble spread and ensemble mean skill. +Climatology data +~~~~~~~~~~~~~~~~ + +The Ensemble-Stat output includes at least three statistics computed relative to external climatology data. The climatology is defined by mean and standard deviation fields, and both are required in the computation of ensemble skill score statistics. MET assumes that the climatology follows a normal distribution, defined by the mean and standard deviation at each point. When computing the CRPS skill score for (:ref:`Gneiting et al., 2004 `) the reference CRPS statistic is computed using the climatological mean and standard deviation directly. When computing the CRPS skill score for (:ref:`Hersbach, 2000 `) the reference CRPS statistic is computed by selecting equal-area-spaced values from the assumed normal climatological distribution. The number of points selected is determined by the *cdf_bins* setting in the *climo_cdf* dictionary. The reference CRPS is computed empirically from this ensemble of climatology values. The climatological distribution is also used for the RPSS. The forecast RPS statistic is computed from a probabilistic contingency table in which the probabilities are derived from the ensemble member values. In a simliar fashion, the climatogical probability for each observed value is derived from the climatological distribution. The area of the distribution to the left of the observed value is interpreted as the climatological probability. These climatological probabilities are also evaluated using a probabilistic contingency table from which the reference RPS score is computed. The skill scores are derived by comparing the forecast statistic to the reference climatology statistic. + Ensemble observation error ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -45,7 +50,7 @@ Observation errors differ according to instrument, temporal and spatial represen Practical Information _____________________ -This section contains information about configuring and running the Ensemble-Stat tool. The Ensemble-Stat tool creates or verifies gridded model data. For verification, this tool can accept either gridded or point observations. If provided, the climatology file must be gridded. The input gridded model, observation, and climatology datasets must be on the same grid prior to calculation of any statistics, and in one of the MET supported gridded file formats. If gridded files are not on the same grid, MET will do the regridding for you if you specify the desired output grid. The point observations must be formatted as the NetCDF output of the point reformatting tools described in :numref:`reformat_point`. +This section contains information about configuring and running the Ensemble-Stat tool. The Ensemble-Stat tool creates or verifies gridded model data. For verification, this tool can accept either gridded or point observations. If provided, the climatology data files must be gridded. The input gridded model, observation, and climatology datasets must be on the same grid prior to calculation of any statistics, and in one of the MET supported gridded file formats. If gridded files are not on the same grid, MET will do the regridding for you if you specify the desired output grid. The point observations must be formatted as the NetCDF output of the point reformatting tools described in :numref:`reformat_point`. ensemble_stat usage ~~~~~~~~~~~~~~~~~~~ @@ -256,7 +261,7 @@ The **obs** dictionary looks very similar to the **fcst** dictionary. If verifyi The **ens_ssvar_bin_size** and **ens_phist_bin_size** specify the width of the categorical bins used to accumulate frequencies for spread-skill-variance or probability integral transform statistics, respectively. -The **prob_cat_thresh** entry is an array of thresholds to be applied in the computation of the RPS line type. Since these thresholds can change for each variable, they can be specified separately for each **fcst.field** entry. If left empty but climatology data is provided, the **climo_cdf** thresholds will be used instead. If no climatology data is provided, and the RPS output line type is requested, then the **prob_cat_thresh** array must be defined. +The **prob_cat_thresh** entry is an array of thresholds to be applied in the computation of the RPS line type. Since these thresholds can change for each variable, they can be specified separately for each **fcst.field** entry. If left empty but climatological mean and standard deviation data is provided, the **climo_cdf** thresholds will be used instead. If no climatology data is provided, and the RPS output line type is requested, then the **prob_cat_thresh** array must be defined. __________________ @@ -800,28 +805,30 @@ The format of the STAT and ASCII output of the Ensemble-Stat tool are described * - 37 - ENS_i - Value of the ith ensemble member (repeated) - * - Last-6 + * - Last-7 - OBS_QC - Quality control string for the observation - * - Last-5 + * - Last-6 - ENS_MEAN - The unperturbed ensemble mean value + * - Last-5 + - CLIMO_MEAN + - Climatological mean value (named CLIMO prior to met-10.0.0) * - Last-4 - - CLIMO - - The value of the included climatology - * - Last-3 - SPREAD - The spread (standard deviation) of the unperturbed ensemble member values - * - Last-2 + * - Last-3 - ENS_MEAN _OERR - The PERTURBED ensemble mean (e.g. with Observation Error). - * - Last-1 + * - Last-2 - SPREAD_OERR - The spread (standard deviation) of the PERTURBED ensemble member values (e.g. with Observation Error). - * - Last + * - Last-1 - SPREAD_PLUS_OERR - The square root of the sum of the unperturbed ensemble variance and the observation error variance. - + * - Last + - CLIMO_STDEV + - Climatological standard deviation value .. role:: raw-html(raw) :format: html diff --git a/met/scripts/config/EnsembleStatConfig b/met/scripts/config/EnsembleStatConfig index 878af55ce5..506e5eea3b 100644 --- a/met/scripts/config/EnsembleStatConfig +++ b/met/scripts/config/EnsembleStatConfig @@ -198,9 +198,8 @@ climo_stdev = { // May be set separately in each "obs.field" entry // climo_cdf = { - cdf_bins = 1; + cdf_bins = 10; center_bins = FALSE; - write_bins = TRUE; } //////////////////////////////////////////////////////////////////////////////// diff --git a/met/src/basic/vx_config/config_constants.h b/met/src/basic/vx_config/config_constants.h index ceea745af7..1ae8d5d90d 100644 --- a/met/src/basic/vx_config/config_constants.h +++ b/met/src/basic/vx_config/config_constants.h @@ -284,11 +284,11 @@ struct RegridInfo { ThreshArray censor_thresh; // Censoring thesholds NumArray censor_val; // and replacement values - void * hook; // not allocated + void * hook; // not allocated - void clear(); - void validate(); // ensure that width and method are accordant - void validate_point(); // ensure that width and method are accordant + void clear(); + void validate(); // ensure that width and method are accordant + void validate_point(); // ensure that width and method are accordant }; //////////////////////////////////////////////////////////////////////// @@ -305,6 +305,7 @@ struct ClimoCDFInfo { ClimoCDFInfo(); void clear(); + void set_cdf_ta(int, bool &); // Construct equally-likely thresholds }; //////////////////////////////////////////////////////////////////////// diff --git a/met/src/basic/vx_config/config_util.cc b/met/src/basic/vx_config/config_util.cc index 084c3a593e..349eb06081 100644 --- a/met/src/basic/vx_config/config_util.cc +++ b/met/src/basic/vx_config/config_util.cc @@ -1483,6 +1483,61 @@ ClimoCDFInfo::ClimoCDFInfo() { /////////////////////////////////////////////////////////////////////////////// +void ClimoCDFInfo::set_cdf_ta(int n_bin, bool ¢er) { + + // Must be greater than 0 + if(n_bin <= 0) { + mlog << Error << "\nClimoCDFInfo::set_cdf_ta() -> " + << "The \"" << conf_key_cdf_bins << "\" entry (" << n_bin + << ") must be greater than zero.\n\n"; + exit(1); + } + + // Even number of bins cannot be centered + if(n_bin%2 == 0 && center) { + mlog << Warning << "\nClimoCDFInfo::set_cdf_ta() -> " + << "Resetting \"" << conf_key_center_bins + << "\" to false since the \"" << conf_key_cdf_bins + << "\" entry (" << n_bin << ") is even.\n\n"; + center = false; + } + + // For a single bin, set center to false + if(n_bin == 1) center = false; + + // Add the first threshold for 0.0 + cdf_ta.add(0.0, thresh_ge); + + double cdf_inc = (center ? 1.0/(n_bin - 1.0) : 1.0/n_bin); + double cdf_val = (center ? cdf_inc/2 : cdf_inc ); + + // Add thresholds between 0.0 and 1.0 + while(cdf_val < 1.0 && !is_eq(cdf_val, 1.0)) { + cdf_ta.add(cdf_val, thresh_ge); + cdf_val += cdf_inc; + } + + // Add the last threshold for 1.0 + cdf_ta.add(1.0, thresh_ge); + + if(n_bin == 1) { + mlog << Debug(4) << "ClimoCDFInfo::set_cdf_ta() -> " + << "Since \"" << conf_key_cdf_bins << "\" = 1, " + << "no climatology CDF bins will be applied.\n"; + } + else { + mlog << Debug(4) << "ClimoCDFInfo::set_cdf_ta() -> " + << "For \"" << conf_key_cdf_bins << "\" (" << n_bin << ") and \"" + << conf_key_center_bins << "\" (" << bool_to_string(center) + << "), defined climatology CDF thresholds: " + << write_css(cdf_ta) << "\n"; + } + + return; +} + +/////////////////////////////////////////////////////////////////////////////// + ClimoCDFInfo parse_conf_climo_cdf(Dictionary *dict) { Dictionary *cdf_dict = (Dictionary *) 0; ClimoCDFInfo info; @@ -1506,7 +1561,8 @@ ClimoCDFInfo parse_conf_climo_cdf(Dictionary *dict) { center = cdf_dict->lookup_bool(conf_key_center_bins); // Conf: write_bins - info.write_bins = cdf_dict->lookup_bool(conf_key_write_bins); + // Used by Grid-Stat and Point-Stat but not by Ensemble-Stat + info.write_bins = cdf_dict->lookup_bool(conf_key_write_bins, false, false); // Check that at least one value is provided if(bins.n() == 0) { @@ -1522,57 +1578,7 @@ ClimoCDFInfo parse_conf_climo_cdf(Dictionary *dict) { } // Interpret a single value as the number of bins else { - - // Store the number of bins - int n_bins = nint(bins[0]); - - // Must be greater than 0 - if(n_bins <= 0) { - mlog << Error << "\nparse_conf_climo_cdf() -> " - << "The \"" << conf_key_cdf_bins << "\" entry (" << n_bins - << ") must be greater than zero.\n\n"; - exit(1); - } - - // Even number of bins cannot be centered - if(n_bins%2 == 0 && center) { - mlog << Warning << "\nparse_conf_climo_cdf() -> " - << "Resetting \"" << conf_key_center_bins - << "\" to false since the \"" << conf_key_cdf_bins - << "\" entry (" << n_bins << ") is even.\n\n"; - center = false; - } - - // For a single bin, set center to false - if(n_bins == 1) center = false; - - // Add the first threshold for 0.0 - info.cdf_ta.add(0.0, thresh_ge); - - double cdf_inc = (center ? 1.0/(bins[0] - 1.0) : 1.0/bins[0]); - double cdf_val = (center ? cdf_inc/2 : cdf_inc ); - - // Add thresholds between 0.0 and 1.0 - while(cdf_val < 1.0 && !is_eq(cdf_val, 1.0)) { - info.cdf_ta.add(cdf_val, thresh_ge); - cdf_val += cdf_inc; - } - - // Add the last threshold for 1.0 - info.cdf_ta.add(1.0, thresh_ge); - - if(n_bins == 1) { - mlog << Debug(4) << "parse_conf_climo_cdf() -> " - << "Since \"" << conf_key_cdf_bins << "\" = 1, " - << "no climatology CDF bins will be applied.\n"; - } - else { - mlog << Debug(4) << "parse_conf_climo_cdf() -> " - << "For \"" << conf_key_cdf_bins << "\" (" << n_bins << ") and \"" - << conf_key_center_bins << "\" (" << bool_to_string(center) - << "), defined climatology CDF thresholds: " - << write_css(info.cdf_ta) << "\n"; - } + info.set_cdf_ta(bins[0], center); } // Sanity check the end points diff --git a/met/src/basic/vx_util/stat_column_defs.h b/met/src/basic/vx_util/stat_column_defs.h index d5a8cf0c1a..c7661cb3cb 100644 --- a/met/src/basic/vx_util/stat_column_defs.h +++ b/met/src/basic/vx_util/stat_column_defs.h @@ -281,13 +281,13 @@ static const char * phist_columns [] = { }; static const char * orank_columns [] = { - "TOTAL", "INDEX", "OBS_SID", - "OBS_LAT", "OBS_LON", "OBS_LVL", - "OBS_ELV", "OBS", "PIT", - "RANK", "N_ENS_VLD", "N_ENS", - "ENS_", "OBS_QC", "ENS_MEAN", - "CLIMO", "SPREAD", "ENS_MEAN_OERR", - "SPREAD_OERR", "SPREAD_PLUS_OERR" + "TOTAL", "INDEX", "OBS_SID", + "OBS_LAT", "OBS_LON", "OBS_LVL", + "OBS_ELV", "OBS", "PIT", + "RANK", "N_ENS_VLD", "N_ENS", + "ENS_", "OBS_QC", "ENS_MEAN", + "CLIMO_MEAN", "SPREAD", "ENS_MEAN_OERR", + "SPREAD_OERR", "SPREAD_PLUS_OERR", "CLIMO_STDEV" }; static const char * ssvar_columns [] = { @@ -424,7 +424,7 @@ inline int get_n_eclv_columns (int n) { return(4 + 2*n); } // n = inline int get_n_rhist_columns (int n) { return(2 + n); } // n = N_RANK inline int get_n_phist_columns (int n) { return(3 + n); } // n = N_BINS inline int get_n_relp_columns (int n) { return(2 + n); } // n = N_ENS -inline int get_n_orank_columns (int n) { return(19 + n); } // n = N_ENS +inline int get_n_orank_columns (int n) { return(20 + n); } // n = N_ENS //////////////////////////////////////////////////////////////////////// diff --git a/met/src/libcode/vx_analysis_util/stat_job.cc b/met/src/libcode/vx_analysis_util/stat_job.cc index 0f259dc4a1..1d1b0fa821 100644 --- a/met/src/libcode/vx_analysis_util/stat_job.cc +++ b/met/src/libcode/vx_analysis_util/stat_job.cc @@ -2439,7 +2439,8 @@ ConcatString STATAnalysisJob::get_jobstring() const { // Jobs which use out_bin_size if(line_type.n_elements() > 0) { if(string_to_statlinetype(line_type[0].c_str()) == stat_orank && - out_line_type.has(stat_phist_str)) { + (out_line_type.has(stat_phist_str) || + out_line_type.has(stat_ecnt_str))) { // out_bin_size js << "-out_bin_size " << out_bin_size << " "; diff --git a/met/src/libcode/vx_stat_out/stat_columns.cc b/met/src/libcode/vx_stat_out/stat_columns.cc index 570977fb17..aeaa1ff684 100644 --- a/met/src/libcode/vx_stat_out/stat_columns.cc +++ b/met/src/libcode/vx_stat_out/stat_columns.cc @@ -480,6 +480,7 @@ void write_orank_header_row(int hdr_flag, int n_ens, AsciiTable &at, at.set_entry(r, c+16+n_ens, (string)orank_columns[17]); at.set_entry(r, c+17+n_ens, (string)orank_columns[18]); at.set_entry(r, c+18+n_ens, (string)orank_columns[19]); + at.set_entry(r, c+19+n_ens, (string)orank_columns[20]); return; } @@ -1600,10 +1601,9 @@ void write_isc_row(StatHdrColumns &shc, const ISCInfo &isc_info, //////////////////////////////////////////////////////////////////////// void write_ecnt_row(StatHdrColumns &shc, const ECNTInfo &ecnt_info, - STATOutputType out_type, int i_bin, int n_bin, + STATOutputType out_type, AsciiTable &stat_at, int &stat_row, AsciiTable &txt_at, int &txt_row) { - ConcatString mask_name = shc.get_mask(); // ECNT line type shc.set_line_type(stat_ecnt_str); @@ -1617,10 +1617,6 @@ void write_ecnt_row(StatHdrColumns &shc, const ECNTInfo &ecnt_info, shc.set_cov_thresh(na_str); shc.set_alpha(bad_data_double); - // Update the mask name, if needed. - ConcatString cs = append_climo_bin(mask_name, i_bin, n_bin); - shc.set_mask(cs.c_str()); - // Write the header columns write_header_cols(shc, stat_at, stat_row); @@ -1638,9 +1634,6 @@ void write_ecnt_row(StatHdrColumns &shc, const ECNTInfo &ecnt_info, // Increment the STAT row counter stat_row++; - // Reset the mask name - shc.set_mask(mask_name.c_str()); - return; } @@ -1681,9 +1674,6 @@ void write_rps_row(StatHdrColumns &shc, const RPSInfo &rps_info, // Increment the STAT row counter stat_row++; - // Reset the mask name - shc.set_mask(mask_name.c_str()); - return; } @@ -3833,14 +3823,14 @@ void write_orank_cols(const PairDataEnsemble *pd_ptr, int i, // // Ensemble Observation Rank Matched Pairs // Dump out the ORANK line: - // TOTAL, INDEX, OBS_SID, - // OBS_LAT, OBS_LON, OBS_LVL, - // OBS_ELV, OBS, PIT, - // RANK, N_ENS_VLD, N_ENS, + // TOTAL, INDEX, OBS_SID, + // OBS_LAT, OBS_LON, OBS_LVL, + // OBS_ELV, OBS, PIT, + // RANK, N_ENS_VLD, N_ENS, // [ENS_] (for each ensemble member) - // OBS_QC, ENS_MEAN, CLIMO, - // SPREAD, ENS_MEAN_OERR, SPREAD_OERR, - // SPREAD_PLUS_OERR + // OBS_QC, ENS_MEAN, CLIMO_MEAN, + // SPREAD, ENS_MEAN_OERR, SPREAD_OERR, + // SPREAD_PLUS_OERR, CLIMO_STDEV // at.set_entry(r, c+0, // Total Number of Pairs pd_ptr->n_obs); // Use n_obs instead of n_pair to include missing data @@ -3896,7 +3886,7 @@ void write_orank_cols(const PairDataEnsemble *pd_ptr, int i, at.set_entry(r, c+13+pd_ptr->n_ens, pd_ptr->mn_na[i]); - // Climatology values + // Climatology mean values at.set_entry(r, c+14+pd_ptr->n_ens, pd_ptr->cmn_na[i]); @@ -3916,6 +3906,10 @@ void write_orank_cols(const PairDataEnsemble *pd_ptr, int i, at.set_entry(r, c+18+pd_ptr->n_ens, square_root(pd_ptr->var_plus_oerr_na[i])); + // Climatology standard deviation values + at.set_entry(r, c+19+pd_ptr->n_ens, + pd_ptr->csd_na[i]); + return; } diff --git a/met/src/libcode/vx_stat_out/stat_columns.h b/met/src/libcode/vx_stat_out/stat_columns.h index c56686bf71..f5db8bb977 100644 --- a/met/src/libcode/vx_stat_out/stat_columns.h +++ b/met/src/libcode/vx_stat_out/stat_columns.h @@ -107,7 +107,7 @@ extern void write_mpr_row (StatHdrColumns &, const PairDataPoint *, STATOutput extern void write_isc_row (StatHdrColumns &, const ISCInfo &, STATOutputType, AsciiTable &, int &, AsciiTable &, int &); extern void write_ecnt_row (StatHdrColumns &, const ECNTInfo &, STATOutputType, - int, int, AsciiTable &, int &, AsciiTable &, int &); + AsciiTable &, int &, AsciiTable &, int &); extern void write_rps_row (StatHdrColumns &, const RPSInfo &, STATOutputType, AsciiTable &, int &, AsciiTable &, int &); extern void write_rhist_row (StatHdrColumns &, const PairDataEnsemble *, STATOutputType, diff --git a/met/src/libcode/vx_statistics/ens_stats.cc b/met/src/libcode/vx_statistics/ens_stats.cc index e1cf37d681..4748b186ac 100644 --- a/met/src/libcode/vx_statistics/ens_stats.cc +++ b/met/src/libcode/vx_statistics/ens_stats.cc @@ -228,10 +228,26 @@ void ECNTInfo::set(const PairDataEnsemble &pd) { // Store the number of ensemble members n_ens = pd.n_ens; - // Get the average empirical and Gaussian CRPS and - // components values - crps_emp = pd.crps_emp_na.wmean(pd.wgt_na); - crps_gaus = pd.crps_gaus_na.wmean(pd.wgt_na); + // Compute empirical CRPS scores + crps_emp = pd.crps_emp_na.wmean(pd.wgt_na); + crpscl_emp = pd.crpscl_emp_na.wmean(pd.wgt_na); + crpss_emp = (is_bad_data(crps_emp) || + is_bad_data(crpscl_emp) || + is_eq(crpscl_emp, 0.0) ? + bad_data_double : + (crpscl_emp - crps_emp) / crpscl_emp); + + // Compute Gaussian CRPS scores + crps_gaus = pd.crps_gaus_na.wmean(pd.wgt_na); + crpscl_gaus = pd.crpscl_gaus_na.wmean(pd.wgt_na); + crpss_gaus = (is_bad_data(crps_gaus) || + is_bad_data(crpscl_gaus) || + is_eq(crpscl_gaus, 0.0) ? + bad_data_double : + (crpscl_gaus - crps_gaus) / crpscl_gaus); + + // Compute the average IGN value + ign = pd.ign_na.wmean(pd.wgt_na); // Get the sum of the weights for(i=0, n_pair=0, w_sum=0.0; i=0.0) and last (>=1.0) climo CDF thresholds + for(int i=1; iname_attr()); // Store the observation variable units - shc.set_obs_units(conf_info.vx_opt[i].vx_pd.obs_info->units_attr()); + cs = conf_info.vx_opt[i].vx_pd.obs_info->units_attr(); + if(cs.empty()) cs = na_string; + shc.set_obs_units(cs); // Set the observation level name shc.set_obs_lev(conf_info.vx_opt[i].vx_pd.obs_info->level_attr().c_str()); @@ -1730,6 +1735,7 @@ void process_grid_vx() { // Initialize pd_all.clear(); pd_all.set_ens_size(n_vx_vld[i]); + pd_all.set_climo_cdf(conf_info.vx_opt[i].cdf_info); pd_all.skip_const = conf_info.vx_opt[i].vx_pd.pd[0][0][0].skip_const; // Apply the current mask to the fields and compute the pairs @@ -1972,69 +1978,25 @@ void process_grid_scores(int i_vx, void do_ecnt(const EnsembleStatVxOpt &vx_opt, const SingleThresh &othresh, const PairDataEnsemble *pd_ptr) { - int i, n_bin; - PairDataEnsemble pd; - ECNTInfo *ecnt_info = (ECNTInfo *) 0; + ECNTInfo ecnt_info; // Check for valid pointer if(!pd_ptr) return; - // Determine the number of climo CDF bins - n_bin = (pd_ptr->cmn_na.n_valid() > 0 && - pd_ptr->csd_na.n_valid() > 0 ? - vx_opt.get_n_cdf_bin() : 1); - - // Allocate memory - ecnt_info = new ECNTInfo [n_bin]; - - // Process the climo CDF bins - for(i=0; i 1) pd = subset_climo_cdf_bin(*pd_ptr, - vx_opt.cdf_info.cdf_ta, i); - else pd = *pd_ptr; - - // Store threshold - ecnt_info[i].othresh = othresh; - - // Check for no matched pairs to process - if(pd.n_obs == 0) continue; - - // Compute ensemble statistics - ecnt_info[i].set(pd); - - // Write out ECNT - if((n_bin == 1 || vx_opt.cdf_info.write_bins) && - vx_opt.output_flag[i_ecnt] != STATOutputType_None && - ecnt_info[i].n_pair > 0) { - write_ecnt_row(shc, ecnt_info[i], vx_opt.output_flag[i_ecnt], - i, n_bin, stat_at, i_stat_row, - txt_at[i_ecnt], i_txt_row[i_ecnt]); - } - } // end for i (n_bin) - - // Write the mean of the climo CDF bins - if(n_bin > 1) { - - // Compute ECNT climo CDF bin means - if(vx_opt.output_flag[i_ecnt] != STATOutputType_None) { - - ECNTInfo ecnt_mean; - compute_ecnt_mean(ecnt_info, n_bin, ecnt_mean); - - if(ecnt_mean.n_pair > 0) { - write_ecnt_row(shc, ecnt_mean, - vx_opt.output_flag[i_ecnt], - -1, n_bin, stat_at, i_stat_row, - txt_at[i_ecnt], i_txt_row[i_ecnt]); - } - } - } // end if n_bin > 1 - - // Dealloate memory - if(ecnt_info) { delete [] ecnt_info; ecnt_info = (ECNTInfo *) 0; } + // Compute continuous ensemble statistics + ecnt_info.set(*pd_ptr); + // Write out ECNT + if(vx_opt.output_flag[i_ecnt] != STATOutputType_None && + ecnt_info.n_pair > 0) { + write_ecnt_row(shc, ecnt_info, vx_opt.output_flag[i_ecnt], + stat_at, i_stat_row, + txt_at[i_ecnt], i_txt_row[i_ecnt]); + } + return; } diff --git a/met/src/tools/core/ensemble_stat/ensemble_stat_conf_info.cc b/met/src/tools/core/ensemble_stat/ensemble_stat_conf_info.cc index e8291e1737..0461bc1b7f 100644 --- a/met/src/tools/core/ensemble_stat/ensemble_stat_conf_info.cc +++ b/met/src/tools/core/ensemble_stat/ensemble_stat_conf_info.cc @@ -877,6 +877,9 @@ void EnsembleStatVxOpt::set_vx_pd(EnsembleStatConfInfo *conf_info) { // Define the dimensions vx_pd.set_pd_size(n_msg_typ, n_mask, n_interp); + // Store climo CDF + vx_pd.set_climo_cdf(cdf_info); + // Store the list of surface message types vx_pd.set_msg_typ_sfc(conf_info->msg_typ_sfc); @@ -964,7 +967,6 @@ void EnsembleStatVxOpt::set_perc_thresh(const PairDataEnsemble *pd_ptr) { int EnsembleStatVxOpt::n_txt_row(int i_txt_row) const { int n = 0; - int n_bin; // Range check if(i_txt_row < 0 || i_txt_row >= n_txt) { @@ -976,15 +978,6 @@ int EnsembleStatVxOpt::n_txt_row(int i_txt_row) const { // Check if this output line type is requested if(output_flag[i_txt_row] == STATOutputType_None) return(0); - // Determine row multiplier for climatology bins - if(cdf_info.write_bins) { - n_bin = get_n_cdf_bin(); - if(n_bin > 1) n_bin++; - } - else { - n_bin = 1; - } - // Switch on the index of the line type switch(i_txt_row) { @@ -993,11 +986,11 @@ int EnsembleStatVxOpt::n_txt_row(int i_txt_row) const { // Maximum number of ECNT and RPS lines possible = // Point Vx: Message Types * Masks * Interpolations * - // Obs Thresholds * Climo Bins + // Obs Thresholds // Grid Vx: Masks * Interpolations * - // Obs Thresholds * Climo Bins + // Obs Thresholds n = (get_n_msg_typ() + 1) * get_n_mask() * get_n_interp() * - get_n_o_thresh() * n_bin; + get_n_o_thresh(); break; case(i_rhist): diff --git a/met/src/tools/core/point_stat/point_stat.cc b/met/src/tools/core/point_stat/point_stat.cc index 664243a721..f61583f3ee 100644 --- a/met/src/tools/core/point_stat/point_stat.cc +++ b/met/src/tools/core/point_stat/point_stat.cc @@ -1762,6 +1762,7 @@ void do_hira_ens(int i_vx, const PairDataPoint *pd_ptr) { hira_pd.clear(); hira_pd.extend(pd_ptr->n_obs); hira_pd.set_ens_size(gt->size()); + hira_pd.set_climo_cdf(conf_info.vx_opt[i_vx].cdf_info); f_ens.extend(gt->size()); // Process each observation point @@ -1832,7 +1833,7 @@ void do_hira_ens(int i_vx, const PairDataPoint *pd_ptr) { write_ecnt_row(shc, ecnt_info, conf_info.vx_opt[i_vx].output_flag[i_ecnt], - 0, 1, stat_at, i_stat_row, + stat_at, i_stat_row, txt_at[i_ecnt], i_txt_row[i_ecnt]); } // end if ECNT diff --git a/met/src/tools/core/stat_analysis/aggr_stat_line.cc b/met/src/tools/core/stat_analysis/aggr_stat_line.cc index 034afdb0cb..3bd72ae766 100644 --- a/met/src/tools/core/stat_analysis/aggr_stat_line.cc +++ b/met/src/tools/core/stat_analysis/aggr_stat_line.cc @@ -2553,8 +2553,6 @@ void aggr_ecnt_lines(LineDataFile &f, STATAnalysisJob &job, // if(m.count(key) == 0) { aggr.ens_pd.clear(); - aggr.crpscl_emp_na.clear(); - aggr.crpscl_gaus_na.clear(); aggr.hdr.clear(); m[key] = aggr; } @@ -2573,9 +2571,9 @@ void aggr_ecnt_lines(LineDataFile &f, STATAnalysisJob &job, // Store the current statistics and weight (TOTAL column) // m[key].ens_pd.crps_emp_na.add(cur.crps_emp); - m[key].crpscl_emp_na.add(cur.crpscl_emp); + m[key].ens_pd.crpscl_emp_na.add(cur.crpscl_emp); m[key].ens_pd.crps_gaus_na.add(cur.crps_gaus); - m[key].crpscl_gaus_na.add(cur.crpscl_gaus); + m[key].ens_pd.crpscl_gaus_na.add(cur.crpscl_gaus); m[key].ens_pd.ign_na.add(cur.ign); m[key].ens_pd.var_na.add(square(cur.spread)); m[key].ens_pd.var_oerr_na.add(square(cur.spread_oerr)); @@ -2620,9 +2618,9 @@ void aggr_ecnt_lines(LineDataFile &f, STATAnalysisJob &job, it->second.ens_pd.rmse_oerr = (is_bad_data(v) ? bad_data_double : sqrt(v)); crps_emp = it->second.ens_pd.crps_emp_na.wmean(it->second.ens_pd.wgt_na); - crpscl_emp = it->second.crpscl_emp_na.wmean(it->second.ens_pd.wgt_na); + crpscl_emp = it->second.ens_pd.crpscl_emp_na.wmean(it->second.ens_pd.wgt_na); crps_gaus = it->second.ens_pd.crps_gaus_na.wmean(it->second.ens_pd.wgt_na); - crpscl_gaus = it->second.crpscl_gaus_na.wmean(it->second.ens_pd.wgt_na); + crpscl_gaus = it->second.ens_pd.crpscl_gaus_na.wmean(it->second.ens_pd.wgt_na); // Compute aggregated empirical CRPSS it->second.ens_pd.crpss_emp = @@ -3002,8 +3000,9 @@ void aggr_orank_lines(LineDataFile &f, STATAnalysisJob &job, AggrENSInfo aggr; ORANKData cur; ConcatString key; + NumArray cur_clm; int i, n_valid, n_bin; - double esum, esumsq, crps_gaus, ign, pit; + double esum, esumsq; map::iterator it; // @@ -3054,6 +3053,8 @@ void aggr_orank_lines(LineDataFile &f, STATAnalysisJob &job, n_bin = ceil(1.0/aggr.ens_pd.phist_bin_size); for(i=0; in_use); // N_USE diff --git a/test/config/EnsembleStatConfig_climo b/test/config/EnsembleStatConfig_climo index 491412e1d4..583ce9dc86 100644 --- a/test/config/EnsembleStatConfig_climo +++ b/test/config/EnsembleStatConfig_climo @@ -82,8 +82,8 @@ nmep_smooth = { // fcst = { field = [ - { name = "TMP"; level = "Z2"; }, - { name = "TMP"; level = "P850"; } + { name = "TMP"; level = "Z2"; message_type = [ "ADPSFC" ]; }, + { name = "TMP"; level = "P850"; message_type = [ "ADPUPA" ]; } ]; } obs = fcst; @@ -117,9 +117,8 @@ climo_stdev = { // May be set separately in each "obs.field" entry // climo_cdf = { - cdf_bins = 4; + cdf_bins = 10; center_bins = FALSE; - write_bins = TRUE; } //////////////////////////////////////////////////////////////////////////////// @@ -209,11 +208,11 @@ interp = { // Statistical output types // output_flag = { - ecnt = STAT; + ecnt = BOTH; rps = STAT; rhist = STAT; phist = STAT; - orank = STAT; + orank = BOTH; ssvar = STAT; relp = STAT; } diff --git a/test/xml/unit_climatology.xml b/test/xml/unit_climatology.xml index 84ce2f8bb0..03eb303d41 100644 --- a/test/xml/unit_climatology.xml +++ b/test/xml/unit_climatology.xml @@ -315,11 +315,14 @@ \ &OUTPUT_DIR;/climatology/ensemble_stat_input_file_list \ &CONFIG_DIR;/EnsembleStatConfig_climo \ + -point_obs &OUTPUT_DIR;/pb2nc/ndas.20120410.t12z.prepbufr.tm00.nc \ -grid_obs &DATA_DIR_OBS;/laps/laps_2012041012_F000.grib \ -outdir &OUTPUT_DIR;/climatology &OUTPUT_DIR;/climatology/ensemble_stat_NCEP_1.0DEG_20120410_120000V.stat + &OUTPUT_DIR;/climatology/ensemble_stat_NCEP_1.0DEG_20120410_120000V_ecnt.txt + &OUTPUT_DIR;/climatology/ensemble_stat_NCEP_1.0DEG_20120410_120000V_orank.txt &OUTPUT_DIR;/climatology/ensemble_stat_NCEP_1.0DEG_20120410_120000V_ens.nc &OUTPUT_DIR;/climatology/ensemble_stat_NCEP_1.0DEG_20120410_120000V_orank.nc diff --git a/test/xml/unit_pb2nc.xml b/test/xml/unit_pb2nc.xml index 515a01907b..a65f52110d 100644 --- a/test/xml/unit_pb2nc.xml +++ b/test/xml/unit_pb2nc.xml @@ -36,6 +36,25 @@ + + &MET_BIN;/pb2nc + + STATION_ID + MASK_GRID + MASK_POLY + QUALITY_MARK_THRESH 2 + + \ + &DATA_DIR_OBS;/prepbufr/ndas/nam.20120410.t12z.prepbufr.tm00.nr \ + &OUTPUT_DIR;/pb2nc/ndas.20120410.t12z.prepbufr.tm00.nc \ + &CONFIG_DIR;/PB2NCConfig \ + -v 1 + + + &OUTPUT_DIR;/pb2nc/ndas.20120410.t12z.prepbufr.tm00.nc + + + &MET_BIN;/pb2nc