ESMValGroup · bouweandela · Nov 10, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 13, 2020
diff --git a/doc/sphinx/source/recipes/recipe_climwip.rst b/doc/sphinx/source/recipes/recipe_climwip.rst
@@ -29,8 +29,10 @@ User settings in recipe
 1. Script ``climwip.py``
 
   *Required settings for script*
-    * ``sigma_performance``: shape parameter weights calculation (determined offline)
-    * ``sigma_independence``: shape parameter for weights calculation (determined offline)
+    * ``performance_sigma``: shape parameter weights calculation (determined offline)
+    * ``independence_sigma``: shape parameter for weights calculation (determined offline)
+    * ``performance_contributions``: dictionary where the keys represent the variable groups to be included in the performance calculation. The values give the relative contribution of each group, with 0 being equivalent to not including the group.
+    * ``independence_contributions``: dictionary where the keys represent the variable groups to be included in the independence calculation. The values give the relative contribution of each group, with 0 being equivalent to not including the group.
     * ``obs_data``: list of project names to specify which are the the observational data. The rest is assumed to be model data.
 
   *Required settings for variables*
@@ -41,11 +43,6 @@ User settings in recipe
     * ``preprocessor``: e.g. climwip_summer_mean
     * ``additional_datasets``: provide a list of model data for performance calculation.
 
-  *Optional settings for variables*
-    * ``performance``: set to false to *not* calculate performance for this variable group
-    * ``independence``: set to false to *not* calculate independence for this variable group
-    * By default, both performance and independence are calculate for each variable group.
-
   *Required settings for preprocessor*
     * Different combinations of preprocessor functions can be used, but the end result should always be aggregated over the time dimension, i.e. the input for the diagnostic script should be 2d (lat/lon).
 
@@ -81,6 +78,7 @@ Variables
 
 * pr (atmos, monthly mean, longitude latitude time)
 * tas (atmos, monthly mean, longitude latitude time)
+* psl (atmos, monthly mean, longitude latitude time)
 * more variables can be added if available for all datasets.
 
 
@@ -93,7 +91,9 @@ multiple datasets.
 References
 ----------
 
-* `Brunner et al. 2019, Environ. Res. Lett., 14, 124010 <https://doi.org/10.1088/1748-9326/ab492f>`_.
+* `Brunner et al. 2020 (accepted), Earth Syst. Dynam., <https://doi.org/10.5194/esd-2020-23>`_.
+* `Merrifield et al. 2020, Earth Syst. Dynam., 11, 807-834, <https://doi.org/10.5194/esd-11-807-2020>`_.
+* `Brunner et al. 2019, Environ. Res. Lett., 14, 124010, <https://doi.org/10.1088/1748-9326/ab492f>`_.
 
 Example plots
 -------------

diff --git a/esmvaltool/diag_scripts/weighting/climwip.py b/esmvaltool/diag_scripts/weighting/climwip.py
@@ -345,25 +345,29 @@ def visualize_and_save_performance(performance: 'xr.DataArray', cfg: dict,
     log_provenance(caption, filename_data, cfg, ancestors)
 
 
-def compute_overall_mean(dataset):
+def compute_overall_mean(dataset, contributions):
     """Normalize all variables in a dataset and return their mean."""
     if 'perfect_model_ensemble' in dataset.dims:
         median_dim = ['perfect_model_ensemble', 'model_ensemble']
     else:
         median_dim = 'model_ensemble'
     normalized = dataset / dataset.median(dim=median_dim)
 
+    weights = xr.DataArray(
+        [contributions[vg] for vg in dataset],
+        coords={'variable_group': list(dataset)},
+        dims='variable_group')
     overall_mean = normalized.to_array(
-        dim='variable_group').mean('variable_group')
+        dim='variable_group').weighted(weights).mean('variable_group')
     overall_mean.name = 'overall_mean'
     overall_mean.attrs['variable_group'] = 'overall_mean'
     overall_mean.attrs['units'] = '1'
     return overall_mean
 
 
 def calculate_weights(performance: 'xr.DataArray',
-                      independence: 'xr.DataArray', sigma_performance: float,
-                      sigma_independence: float) -> 'xr.DataArray':
+                      independence: 'xr.DataArray', performance_sigma: float,
+                      independence_sigma: float) -> 'xr.DataArray':
     """Calculate the (NOT normalised) weights for each model N.
 
     Parameters
@@ -372,19 +376,19 @@ def calculate_weights(performance: 'xr.DataArray',
         Array specifying the model performance.
     independence : array_like, shape (N, N)
         Array specifying the model independence.
-    sigma_performance : float
+    performance_sigma : float
         Sigma value defining the form of the weighting function
             for the performance.
-    sigma_independence : float
+    independence_sigma : float
         Sigma value defining the form of the weighting function
             for the independence.
 
     Returns
     -------
     weights : ndarray, shape (N,)
     """
-    numerator = np.exp(-((performance / sigma_performance)**2))
-    exp = np.exp(-((independence / sigma_independence)**2))
+    numerator = np.exp(-((performance / performance_sigma)**2))
+    exp = np.exp(-((independence / independence_sigma)**2))
 
     # Note diagonal = exp(0) = 1, thus this is equal to 1 + sum(i!=j)
     denominator = exp.sum(axis=0)
@@ -422,15 +426,35 @@ def main(cfg):
     """Perform climwip weighting method."""
     models, observations = read_metadata(cfg)
 
-    variable_groups = models.keys()
+    variable_groups_independence = [
+        key for key, value in cfg['independence_contributions'].items() if value > 0]
+    variable_groups_performance = [
+        key for key, value in cfg['performance_contributions'].items() if value > 0]
+    assert len(variable_groups_independence) > 0
+    assert len(variable_groups_performance) > 0
 
     model_ancestors = []
     obs_ancestors = []
 
     performances = {}
     independences = {}
 
-    for variable_group in variable_groups:
+    for variable_group in variable_groups_independence:
+
+        logger.info('Reading model data for %s', variable_group)
+        datasets_model = models[variable_group]
+        model_data, model_data_files = read_model_data(datasets_model)
+
+        logger.info('Calculating independence for %s', variable_group)
+        independence = calculate_independence(model_data)
+        visualize_and_save_independence(independence, cfg,
+                                        model_data_files)
+        logger.debug(independence.values)
+        independences[variable_group] = independence
+
+        model_ancestors.extend(model_data_files)
+
+    for variable_group in variable_groups_performance:
 
         logger.info('Reading model data for %s', variable_group)
         datasets_model = models[variable_group]
@@ -441,41 +465,34 @@ def main(cfg):
         obs_data, obs_data_files = read_observation_data(datasets_obs)
         obs_data = aggregate_obs_data(obs_data, operator='median')
 
-        if datasets_model[0].get('independence', True):
-            logger.info('Calculating independence for %s', variable_group)
-            independence = calculate_independence(model_data)
-            visualize_and_save_independence(independence, cfg,
-                                            model_data_files)
-            logger.debug(independence.values)
-            independences[variable_group] = independence
-
-        if datasets_model[0].get('performance', True):
-            logger.info('Calculating performance for %s', variable_group)
-            performance = calculate_performance(model_data, obs_data)
-            visualize_and_save_performance(performance, cfg,
-                                           model_data_files + obs_data_files)
-            logger.debug(performance.values)
-            performances[variable_group] = performance
-            obs_ancestors.extend(obs_data_files)
+        logger.info('Calculating performance for %s', variable_group)
+        performance = calculate_performance(model_data, obs_data)
+        visualize_and_save_performance(performance, cfg,
+                                       model_data_files + obs_data_files)
+        logger.debug(performance.values)
+        performances[variable_group] = performance
+        obs_ancestors.extend(obs_data_files)
 
         model_ancestors.extend(model_data_files)
 
+    model_ancestors = [*{*model_ancestors}]  # make unique
+
     logger.info('Computing overall mean independence')
     independence = xr.Dataset(independences)
-    overall_independence = compute_overall_mean(independence)
+    overall_independence = compute_overall_mean(independence, cfg['independence_contributions'])
     visualize_and_save_independence(overall_independence, cfg, model_ancestors)
 
     logger.info('Computing overall mean performance')
     performance = xr.Dataset(performances)
-    overall_performance = compute_overall_mean(performance)
+    overall_performance = compute_overall_mean(performance, cfg['performance_contributions'])
     visualize_and_save_performance(overall_performance, cfg,
                                    model_ancestors + obs_ancestors)
 
     logger.info('Calculating weights')
-    sigma_performance = cfg['sigma_performance']
-    sigma_independence = cfg['sigma_independence']
+    performance_sigma = cfg['performance_sigma']
+    independence_sigma = cfg['independence_sigma']
     weights = calculate_weights(overall_performance, overall_independence,
-                                sigma_performance, sigma_independence)
+                                performance_sigma, independence_sigma)
     visualize_and_save_weights(weights,
                                cfg,
                                ancestors=model_ancestors + obs_ancestors)

diff --git a/esmvaltool/recipes/recipe_climwip.yml b/esmvaltool/recipes/recipe_climwip.yml
@@ -87,8 +87,16 @@ diagnostics:
       climwip:
         script: weighting/climwip.py
         obs_data: native6
-        sigma_performance: 0.5
-        sigma_independence: 0.5
+        performance_sigma: 0.5
+        performance_contributions:
+          tas_CLIM: 1
+          pr_CLIM: 2
+          psl_CLIM: 1
+        independence_sigma: 0.5
+        independence_contributions:
+          tas_CLIM: .5
+          pr_CLIM: .25
+          psl_CLIM: 0  # equivalent to not setting it
 
   weighted_temperature_graph:
     variables: