Merge pull request #95 from SALib/calvin

Sobol groups and distributions
SALib · Feb 24, 2016 · d4dc4ed · d4dc4ed
2 parents 070e250 + d696c07
commit d4dc4ed
Show file tree

Hide file tree

Showing 8 changed files with 242 additions and 77 deletions.
diff --git a/SALib/analyze/morris.py b/SALib/analyze/morris.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from . import common_args
-from ..util import read_param_file
+from ..util import read_param_file, compute_groups_matrix
 
 
 def analyze(problem, X, Y,
@@ -71,7 +71,7 @@ def analyze(problem, X, Y,
     if (problem['groups'] is None) & (Y.size % (num_vars + 1) == 0):
         num_trajectories = int(Y.size / (num_vars + 1))
     elif problem['groups'] is not None:
-        groups, unique_group_names = problem['groups']
+        groups, unique_group_names = compute_groups_matrix(problem['groups'], num_vars)
         number_of_groups = len(unique_group_names)
         num_trajectories = int(Y.size / (number_of_groups + 1))
     else:

diff --git a/SALib/analyze/sobol.py b/SALib/analyze/sobol.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from . import common_args
-from ..util import read_param_file
+from ..util import read_param_file, compute_groups_matrix
 
 from multiprocessing import Pool, cpu_count
 from functools import partial
@@ -21,12 +21,12 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
             conf_level=0.95, print_to_console=False, parallel=False,
             n_processors=None):
     """Perform Sobol Analysis on model outputs.
-    
+
     Returns a dictionary with keys 'S1', 'S1_conf', 'ST', and 'ST_conf', where
     each entry is a list of size D (the number of parameters) containing the
     indices in the same order as the parameter file.  If calc_second_order is
     True, the dictionary also contains keys 'S2' and 'S2_conf'.
-    
+
     Parameters
     ----------
     problem : dict
@@ -41,7 +41,7 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
         The confidence interval level (default 0.95)
     print_to_console : bool
         Print results directly to console (default False)
-        
+
     References
     ----------
     .. [1] Sobol, I. M. (2001).  "Global sensitivity indices for nonlinear
@@ -56,26 +56,30 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
            output.  Design and estimator for the total sensitivity index."
            Computer Physics Communications, 181(2):259-270,
            doi:10.1016/j.cpc.2009.09.018.
-           
+
     Examples
     --------
     >>> X = saltelli.sample(problem, 1000)
     >>> Y = Ishigami.evaluate(X)
     >>> Si = sobol.analyze(problem, Y, print_to_console=True)
     """
-
-    D = problem['num_vars']
+    # determining if groups are defined and adjusting the number
+    # of rows in the cross-sampled matrix accordingly
+    if not problem.get('groups'):
+        D = problem['num_vars']
+    else:
+        D = len(set(problem['groups']))
 
     if calc_second_order and Y.size % (2 * D + 2) == 0:
         N = int(Y.size / (2 * D + 2))
     elif not calc_second_order and Y.size % (D + 2) == 0:
         N = int(Y.size / (D + 2))
     else:
         raise RuntimeError("""
-        Incorrect number of samples in model output file. 
+        Incorrect number of samples in model output file.
         Confirm that calc_second_order matches option used during sampling.""")
 
-    if not 0 < conf_level < 1:
+    if conf_level < 0 or conf_level > 1:
         raise RuntimeError("Confidence level must be between 0-1.")
 
     A,B,AB,BA = separate_output_values(Y, D, N, calc_second_order)
@@ -97,10 +101,10 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
                 for k in range(j + 1, D):
                     S['S2'][j, k] = second_order(
                         A, AB[:, j], AB[:, k], BA[:, j], B)
-                    S['S2_conf'][j, k] = Z * second_order(A[r], AB[r, j], 
+                    S['S2_conf'][j, k] = Z * second_order(A[r], AB[r, j],
                         AB[r, k], BA[r, j], B[r]).std(ddof=1)
 
-    else:           
+    else:
         tasks, n_processors = create_task_list(D, calc_second_order, n_processors)
 
         func = partial(sobol_parallel, Z, A, AB, BA, B, r)
@@ -115,7 +119,6 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
     if print_to_console:
         print_indices(S, problem, calc_second_order)
 
-
     return S
 
 
@@ -153,7 +156,7 @@ def create_Si_dict(D, calc_second_order):
     return S
 
 
-def separate_output_values(Y, D, N, calc_second_order): 
+def separate_output_values(Y, D, N, calc_second_order):
     AB = np.empty((N, D))
     BA = np.empty((N, D)) if calc_second_order else None
     step = 2 * D + 2 if calc_second_order else D + 2
@@ -193,7 +196,7 @@ def create_task_list(D, calc_second_order, n_processors):
     # Create list with one entry (key, parameter 1, parameter 2) per sobol
     # index (+conf.). This is used to supply parallel tasks to multiprocessing.Pool
     tasks_first_order = [[d, j, None] for j in range(D) for d in ('S1', 'S1_conf', 'ST', 'ST_conf')]
-    
+
     # Add second order (+conf.) to tasks
     tasks_second_order = []
     if calc_second_order:
@@ -208,7 +211,7 @@ def create_task_list(D, calc_second_order, n_processors):
     else:
         # merges both lists alternating its elements and splits the resulting list into n_processors sublists
         tasks = np.array_split([v for v in sum(
-            zip_longest(tasks_first_order[::-1], tasks_second_order), ()) 
+            zip_longest(tasks_first_order[::-1], tasks_second_order), ())
                 if v is not None], n_processors)
 
     return tasks, n_processors
@@ -232,21 +235,28 @@ def Si_list_to_dict(S_list, D, calc_second_order):
 
 def print_indices(S, problem, calc_second_order):
     # Output to console
-    D = problem['num_vars']
-    print('Parameter S1 S1_conf ST ST_conf')
+    if not problem.get('groups'):
+        title = 'Parameter'
+        names = problem['names']
+        D = problem['num_vars']
+    else:
+        title = 'Group'
+        _,names = compute_groups_matrix(problem['groups'], problem['num_vars'])
+        D = len(names)
+
+    print('%s S1 S1_conf ST ST_conf' % title)
 
     for j in range(D):
-        print('%s %f %f %f %f' % (problem['names'][j], S['S1'][
+        print('%s %f %f %f %f' % (names[j], S['S1'][
             j], S['S1_conf'][j], S['ST'][j], S['ST_conf'][j]))
 
     if calc_second_order:
-        print('\nParameter_1 Parameter_2 S2 S2_conf')
+        print('\n%s_1 %s_2 S2 S2_conf' % (title,title))
 
         for j in range(D):
             for k in range(j + 1, D):
-                print("%s %s %f %f" % (problem['names'][j], problem[
-                    'names'][k], S['S2'][j, k], S['S2_conf'][j, k]))
-
+                print("%s %s %f %f" % (names[j], names[k], 
+                    S['S2'][j, k], S['S2_conf'][j, k]))
 
 if __name__ == "__main__":
     parser = common_args.create()
@@ -272,5 +282,5 @@ def print_indices(S, problem, calc_second_order):
                    usecols=(args.column,))
 
     analyze(problem, Y, (args.max_order == 2),
-            num_resamples=args.resamples, print_to_console=True, 
+            num_resamples=args.resamples, print_to_console=True,
             parallel=args.parallel, n_processors=args.n_processors)
diff --git a/SALib/sample/morris.py b/SALib/sample/morris.py
@@ -4,7 +4,7 @@
 import random as rd
 
 from . import common_args
-from ..util import scale_samples, read_param_file
+from ..util import scale_samples, read_param_file, compute_groups_matrix
 from . optimal_trajectories import return_max_combo
 
 from . morris_util import *
@@ -67,10 +67,10 @@ def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None, local_o
     if grid_jump >= num_levels:
         raise ValueError("grid_jump must be less than num_levels")
 
-    if problem.get('groups') is None:
-        sample = sample_oat(problem, N, num_levels, grid_jump)
-    else:
+    if problem.get('groups'):
         sample = sample_groups(problem, N, num_levels, grid_jump)
+    else:
+        sample = sample_oat(problem, N, num_levels, grid_jump)
 
     if optimal_trajectories:
 
@@ -141,7 +141,7 @@ def sample_groups(problem, N, num_levels, grid_jump):
     Returns an N(g+1)-by-k array of N trajectories;
     where g is the number of groups and k is the number of factors
     '''
-    G, group_names = problem['groups']
+    G, group_names = compute_groups_matrix(problem['groups'], problem['num_vars'])
 
     if G is None:
         raise ValueError("Please define the matrix G.")
@@ -163,7 +163,7 @@ def compute_optimised_trajectories(problem, input_sample, N, k_choices, local_op
     correct call here.
     '''
     num_params = problem['num_vars']
-    groups = problem['groups']
+    groups = compute_groups_matrix(problem['groups'], num_params)
 
     if np.any((input_sample < 0) | (input_sample > 1)):
         raise ValueError("Input sample must be scaled between 0 and 1")

diff --git a/SALib/sample/saltelli.py b/SALib/sample/saltelli.py
@@ -4,20 +4,20 @@
 
 from . import common_args
 from . import sobol_sequence
-from ..util import scale_samples, read_param_file
+from ..util import scale_samples, nonuniform_scale_samples, read_param_file, compute_groups_matrix
 
 
 def sample(problem, N, calc_second_order=True):
     """Generates model inputs using Saltelli's extension of the Sobol sequence.
-    
+
     Returns a NumPy matrix containing the model inputs using Saltelli's sampling
     scheme.  Saltelli's scheme extends the Sobol sequence in a way to reduce
     the error rates in the resulting sensitivity index calculations.  If
     calc_second_order is False, the resulting matrix has N * (D + 2)
     rows, where D is the number of parameters.  If calc_second_order is True,
     the resulting matrix has N * (2D + 2) rows.  These model inputs are
     intended to be used with :func:`SALib.analyze.sobol.analyze`.
-    
+
     Parameters
     ----------
     problem : dict
@@ -28,6 +28,13 @@ def sample(problem, N, calc_second_order=True):
         Calculate second-order sensitivities (default True)
     """
     D = problem['num_vars']
+    groups = problem.get('groups')
+
+    if not groups:
+        Dg = problem['num_vars']
+    else:
+        Dg = len(set(groups))
+        G, group_names = compute_groups_matrix(groups, D)
 
     # How many values of the Sobol sequence to skip
     skip_values = 1000
@@ -36,9 +43,9 @@ def sample(problem, N, calc_second_order=True):
     base_sequence = sobol_sequence.sample(N + skip_values, 2 * D)
 
     if calc_second_order:
-        saltelli_sequence = np.empty([(2 * D + 2) * N, D])
+        saltelli_sequence = np.empty([(2 * Dg + 2) * N, D])
     else:
-        saltelli_sequence = np.empty([(D + 2) * N, D])
+        saltelli_sequence = np.empty([(Dg + 2) * N, D])
     index = 0
 
     for i in range(skip_values, N + skip_values):
@@ -50,9 +57,9 @@ def sample(problem, N, calc_second_order=True):
         index += 1
 
         # Cross-sample elements of "B" into "A"
-        for k in range(D):
+        for k in range(Dg):
             for j in range(D):
-                if j == k:
+                if (not groups and j == k) or (groups and group_names[k] == groups[j]):
                     saltelli_sequence[index, j] = base_sequence[i, j + D]
                 else:
                     saltelli_sequence[index, j] = base_sequence[i, j]
@@ -62,9 +69,9 @@ def sample(problem, N, calc_second_order=True):
         # Cross-sample elements of "A" into "B"
         # Only needed if you're doing second-order indices (true by default)
         if calc_second_order:
-            for k in range(D):
+            for k in range(Dg):
                 for j in range(D):
-                    if j == k:
+                    if (not groups and j == k) or (groups and group_names[k] == groups[j]):
                         saltelli_sequence[index, j] = base_sequence[i, j]
                     else:
                         saltelli_sequence[index, j] = base_sequence[i, j + D]
@@ -76,17 +83,22 @@ def sample(problem, N, calc_second_order=True):
             saltelli_sequence[index, j] = base_sequence[i, j + D]
 
         index += 1
-
-    scale_samples(saltelli_sequence, problem['bounds'])
-    return saltelli_sequence
+    if not problem.get('dists'):
+        # scaling values out of 0-1 range with uniform distributions
+        scale_samples(saltelli_sequence, problem['bounds'])
+        return saltelli_sequence
+    else:
+        # scaling values to other distributions based on inverse CDFs
+        scaled_saltelli = nonuniform_scale_samples(saltelli_sequence, problem['bounds'], problem['dists'])
+        return scaled_saltelli
 
 if __name__ == "__main__":
 
     parser = common_args.create()
-    
+
     parser.add_argument(
         '-n', '--samples', type=int, required=True, help='Number of Samples')
-   
+
     parser.add_argument('--max-order', type=int, required=False, default=2,
                         choices=[1, 2], help='Maximum order of sensitivity indices to calculate')
     args = parser.parse_args()

diff --git a/SALib/tests/test_morris.py b/SALib/tests/test_morris.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from ..sample.morris import sample, compute_optimised_trajectories
-from ..util import read_param_file
+from ..util import read_param_file, compute_groups_matrix
 
 
 def teardown():
@@ -54,7 +54,7 @@ def test_group_in_param_file_read():
     '''
     parameter_file = "SALib/tests/test_param_file_w_groups.txt"
     problem = read_param_file(parameter_file)
-    groups, group_names = problem['groups']
+    groups, group_names = compute_groups_matrix(problem['groups'], problem['num_vars'])
 
     assert_equal(problem['names'], ["Test 1", "Test 2", "Test 3"])
     assert_equal(groups, np.matrix('1,0;1,0;0,1', dtype=np.int))
@@ -158,15 +158,15 @@ def test_catch_inputs_not_in_zero_one_range():
     compute_optimised_trajectories(problem, input_sample, N, k_choices)
 
 
-@raises(ValueError)
-def test_group_sample_fails_with_no_G_matrix():
-    N = 6
-    num_levels = 4
-    grid_jump = 2
-    problem = {'bounds': [[0., 1.], [0., 1.], [0., 1.], [0., 1.]],
-               'num_vars': 4,
-               'groups': (None, None)}
-    sample(problem, N, num_levels, grid_jump)
+# @raises(ValueError)
+# def test_group_sample_fails_with_no_G_matrix():
+#     N = 6
+#     num_levels = 4
+#     grid_jump = 2
+#     problem = {'bounds': [[0., 1.], [0., 1.], [0., 1.], [0., 1.]],
+#                'num_vars': 4,
+#                'groups': None}
+#     sample(problem, N, num_levels, grid_jump)
 
 
 @raises(TypeError)