Skip to content

Commit

Permalink
Merge pull request #95 from SALib/calvin
Browse files Browse the repository at this point in the history
Sobol groups and distributions
  • Loading branch information
jdherman committed Feb 24, 2016
2 parents 070e250 + d696c07 commit d4dc4ed
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 77 deletions.
4 changes: 2 additions & 2 deletions SALib/analyze/morris.py
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from . import common_args
from ..util import read_param_file
from ..util import read_param_file, compute_groups_matrix


def analyze(problem, X, Y,
Expand Down Expand Up @@ -71,7 +71,7 @@ def analyze(problem, X, Y,
if (problem['groups'] is None) & (Y.size % (num_vars + 1) == 0):
num_trajectories = int(Y.size / (num_vars + 1))
elif problem['groups'] is not None:
groups, unique_group_names = problem['groups']
groups, unique_group_names = compute_groups_matrix(problem['groups'], num_vars)
number_of_groups = len(unique_group_names)
num_trajectories = int(Y.size / (number_of_groups + 1))
else:
Expand Down
56 changes: 33 additions & 23 deletions SALib/analyze/sobol.py 100755 → 100644
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from . import common_args
from ..util import read_param_file
from ..util import read_param_file, compute_groups_matrix

from multiprocessing import Pool, cpu_count
from functools import partial
Expand All @@ -21,12 +21,12 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
conf_level=0.95, print_to_console=False, parallel=False,
n_processors=None):
"""Perform Sobol Analysis on model outputs.
Returns a dictionary with keys 'S1', 'S1_conf', 'ST', and 'ST_conf', where
each entry is a list of size D (the number of parameters) containing the
indices in the same order as the parameter file. If calc_second_order is
True, the dictionary also contains keys 'S2' and 'S2_conf'.
Parameters
----------
problem : dict
Expand All @@ -41,7 +41,7 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
The confidence interval level (default 0.95)
print_to_console : bool
Print results directly to console (default False)
References
----------
.. [1] Sobol, I. M. (2001). "Global sensitivity indices for nonlinear
Expand All @@ -56,26 +56,30 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
output. Design and estimator for the total sensitivity index."
Computer Physics Communications, 181(2):259-270,
doi:10.1016/j.cpc.2009.09.018.
Examples
--------
>>> X = saltelli.sample(problem, 1000)
>>> Y = Ishigami.evaluate(X)
>>> Si = sobol.analyze(problem, Y, print_to_console=True)
"""

D = problem['num_vars']
# determining if groups are defined and adjusting the number
# of rows in the cross-sampled matrix accordingly
if not problem.get('groups'):
D = problem['num_vars']
else:
D = len(set(problem['groups']))

if calc_second_order and Y.size % (2 * D + 2) == 0:
N = int(Y.size / (2 * D + 2))
elif not calc_second_order and Y.size % (D + 2) == 0:
N = int(Y.size / (D + 2))
else:
raise RuntimeError("""
Incorrect number of samples in model output file.
Incorrect number of samples in model output file.
Confirm that calc_second_order matches option used during sampling.""")

if not 0 < conf_level < 1:
if conf_level < 0 or conf_level > 1:
raise RuntimeError("Confidence level must be between 0-1.")

A,B,AB,BA = separate_output_values(Y, D, N, calc_second_order)
Expand All @@ -97,10 +101,10 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
for k in range(j + 1, D):
S['S2'][j, k] = second_order(
A, AB[:, j], AB[:, k], BA[:, j], B)
S['S2_conf'][j, k] = Z * second_order(A[r], AB[r, j],
S['S2_conf'][j, k] = Z * second_order(A[r], AB[r, j],
AB[r, k], BA[r, j], B[r]).std(ddof=1)

else:
else:
tasks, n_processors = create_task_list(D, calc_second_order, n_processors)

func = partial(sobol_parallel, Z, A, AB, BA, B, r)
Expand All @@ -115,7 +119,6 @@ def analyze(problem, Y, calc_second_order=True, num_resamples=100,
if print_to_console:
print_indices(S, problem, calc_second_order)


return S


Expand Down Expand Up @@ -153,7 +156,7 @@ def create_Si_dict(D, calc_second_order):
return S


def separate_output_values(Y, D, N, calc_second_order):
def separate_output_values(Y, D, N, calc_second_order):
AB = np.empty((N, D))
BA = np.empty((N, D)) if calc_second_order else None
step = 2 * D + 2 if calc_second_order else D + 2
Expand Down Expand Up @@ -193,7 +196,7 @@ def create_task_list(D, calc_second_order, n_processors):
# Create list with one entry (key, parameter 1, parameter 2) per sobol
# index (+conf.). This is used to supply parallel tasks to multiprocessing.Pool
tasks_first_order = [[d, j, None] for j in range(D) for d in ('S1', 'S1_conf', 'ST', 'ST_conf')]

# Add second order (+conf.) to tasks
tasks_second_order = []
if calc_second_order:
Expand All @@ -208,7 +211,7 @@ def create_task_list(D, calc_second_order, n_processors):
else:
# merges both lists alternating its elements and splits the resulting list into n_processors sublists
tasks = np.array_split([v for v in sum(
zip_longest(tasks_first_order[::-1], tasks_second_order), ())
zip_longest(tasks_first_order[::-1], tasks_second_order), ())
if v is not None], n_processors)

return tasks, n_processors
Expand All @@ -232,21 +235,28 @@ def Si_list_to_dict(S_list, D, calc_second_order):

def print_indices(S, problem, calc_second_order):
# Output to console
D = problem['num_vars']
print('Parameter S1 S1_conf ST ST_conf')
if not problem.get('groups'):
title = 'Parameter'
names = problem['names']
D = problem['num_vars']
else:
title = 'Group'
_,names = compute_groups_matrix(problem['groups'], problem['num_vars'])
D = len(names)

print('%s S1 S1_conf ST ST_conf' % title)

for j in range(D):
print('%s %f %f %f %f' % (problem['names'][j], S['S1'][
print('%s %f %f %f %f' % (names[j], S['S1'][
j], S['S1_conf'][j], S['ST'][j], S['ST_conf'][j]))

if calc_second_order:
print('\nParameter_1 Parameter_2 S2 S2_conf')
print('\n%s_1 %s_2 S2 S2_conf' % (title,title))

for j in range(D):
for k in range(j + 1, D):
print("%s %s %f %f" % (problem['names'][j], problem[
'names'][k], S['S2'][j, k], S['S2_conf'][j, k]))

print("%s %s %f %f" % (names[j], names[k],
S['S2'][j, k], S['S2_conf'][j, k]))

if __name__ == "__main__":
parser = common_args.create()
Expand All @@ -272,5 +282,5 @@ def print_indices(S, problem, calc_second_order):
usecols=(args.column,))

analyze(problem, Y, (args.max_order == 2),
num_resamples=args.resamples, print_to_console=True,
num_resamples=args.resamples, print_to_console=True,
parallel=args.parallel, n_processors=args.n_processors)
12 changes: 6 additions & 6 deletions SALib/sample/morris.py
Expand Up @@ -4,7 +4,7 @@
import random as rd

from . import common_args
from ..util import scale_samples, read_param_file
from ..util import scale_samples, read_param_file, compute_groups_matrix
from . optimal_trajectories import return_max_combo

from . morris_util import *
Expand Down Expand Up @@ -67,10 +67,10 @@ def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None, local_o
if grid_jump >= num_levels:
raise ValueError("grid_jump must be less than num_levels")

if problem.get('groups') is None:
sample = sample_oat(problem, N, num_levels, grid_jump)
else:
if problem.get('groups'):
sample = sample_groups(problem, N, num_levels, grid_jump)
else:
sample = sample_oat(problem, N, num_levels, grid_jump)

if optimal_trajectories:

Expand Down Expand Up @@ -141,7 +141,7 @@ def sample_groups(problem, N, num_levels, grid_jump):
Returns an N(g+1)-by-k array of N trajectories;
where g is the number of groups and k is the number of factors
'''
G, group_names = problem['groups']
G, group_names = compute_groups_matrix(problem['groups'], problem['num_vars'])

if G is None:
raise ValueError("Please define the matrix G.")
Expand All @@ -163,7 +163,7 @@ def compute_optimised_trajectories(problem, input_sample, N, k_choices, local_op
correct call here.
'''
num_params = problem['num_vars']
groups = problem['groups']
groups = compute_groups_matrix(problem['groups'], num_params)

if np.any((input_sample < 0) | (input_sample > 1)):
raise ValueError("Input sample must be scaled between 0 and 1")
Expand Down
40 changes: 26 additions & 14 deletions SALib/sample/saltelli.py 100755 → 100644
Expand Up @@ -4,20 +4,20 @@

from . import common_args
from . import sobol_sequence
from ..util import scale_samples, read_param_file
from ..util import scale_samples, nonuniform_scale_samples, read_param_file, compute_groups_matrix


def sample(problem, N, calc_second_order=True):
"""Generates model inputs using Saltelli's extension of the Sobol sequence.
Returns a NumPy matrix containing the model inputs using Saltelli's sampling
scheme. Saltelli's scheme extends the Sobol sequence in a way to reduce
the error rates in the resulting sensitivity index calculations. If
calc_second_order is False, the resulting matrix has N * (D + 2)
rows, where D is the number of parameters. If calc_second_order is True,
the resulting matrix has N * (2D + 2) rows. These model inputs are
intended to be used with :func:`SALib.analyze.sobol.analyze`.
Parameters
----------
problem : dict
Expand All @@ -28,6 +28,13 @@ def sample(problem, N, calc_second_order=True):
Calculate second-order sensitivities (default True)
"""
D = problem['num_vars']
groups = problem.get('groups')

if not groups:
Dg = problem['num_vars']
else:
Dg = len(set(groups))
G, group_names = compute_groups_matrix(groups, D)

# How many values of the Sobol sequence to skip
skip_values = 1000
Expand All @@ -36,9 +43,9 @@ def sample(problem, N, calc_second_order=True):
base_sequence = sobol_sequence.sample(N + skip_values, 2 * D)

if calc_second_order:
saltelli_sequence = np.empty([(2 * D + 2) * N, D])
saltelli_sequence = np.empty([(2 * Dg + 2) * N, D])
else:
saltelli_sequence = np.empty([(D + 2) * N, D])
saltelli_sequence = np.empty([(Dg + 2) * N, D])
index = 0

for i in range(skip_values, N + skip_values):
Expand All @@ -50,9 +57,9 @@ def sample(problem, N, calc_second_order=True):
index += 1

# Cross-sample elements of "B" into "A"
for k in range(D):
for k in range(Dg):
for j in range(D):
if j == k:
if (not groups and j == k) or (groups and group_names[k] == groups[j]):
saltelli_sequence[index, j] = base_sequence[i, j + D]
else:
saltelli_sequence[index, j] = base_sequence[i, j]
Expand All @@ -62,9 +69,9 @@ def sample(problem, N, calc_second_order=True):
# Cross-sample elements of "A" into "B"
# Only needed if you're doing second-order indices (true by default)
if calc_second_order:
for k in range(D):
for k in range(Dg):
for j in range(D):
if j == k:
if (not groups and j == k) or (groups and group_names[k] == groups[j]):
saltelli_sequence[index, j] = base_sequence[i, j]
else:
saltelli_sequence[index, j] = base_sequence[i, j + D]
Expand All @@ -76,17 +83,22 @@ def sample(problem, N, calc_second_order=True):
saltelli_sequence[index, j] = base_sequence[i, j + D]

index += 1

scale_samples(saltelli_sequence, problem['bounds'])
return saltelli_sequence
if not problem.get('dists'):
# scaling values out of 0-1 range with uniform distributions
scale_samples(saltelli_sequence, problem['bounds'])
return saltelli_sequence
else:
# scaling values to other distributions based on inverse CDFs
scaled_saltelli = nonuniform_scale_samples(saltelli_sequence, problem['bounds'], problem['dists'])
return scaled_saltelli

if __name__ == "__main__":

parser = common_args.create()

parser.add_argument(
'-n', '--samples', type=int, required=True, help='Number of Samples')

parser.add_argument('--max-order', type=int, required=False, default=2,
choices=[1, 2], help='Maximum order of sensitivity indices to calculate')
args = parser.parse_args()
Expand Down
22 changes: 11 additions & 11 deletions SALib/tests/test_morris.py
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from ..sample.morris import sample, compute_optimised_trajectories
from ..util import read_param_file
from ..util import read_param_file, compute_groups_matrix


def teardown():
Expand Down Expand Up @@ -54,7 +54,7 @@ def test_group_in_param_file_read():
'''
parameter_file = "SALib/tests/test_param_file_w_groups.txt"
problem = read_param_file(parameter_file)
groups, group_names = problem['groups']
groups, group_names = compute_groups_matrix(problem['groups'], problem['num_vars'])

assert_equal(problem['names'], ["Test 1", "Test 2", "Test 3"])
assert_equal(groups, np.matrix('1,0;1,0;0,1', dtype=np.int))
Expand Down Expand Up @@ -158,15 +158,15 @@ def test_catch_inputs_not_in_zero_one_range():
compute_optimised_trajectories(problem, input_sample, N, k_choices)


@raises(ValueError)
def test_group_sample_fails_with_no_G_matrix():
N = 6
num_levels = 4
grid_jump = 2
problem = {'bounds': [[0., 1.], [0., 1.], [0., 1.], [0., 1.]],
'num_vars': 4,
'groups': (None, None)}
sample(problem, N, num_levels, grid_jump)
# @raises(ValueError)
# def test_group_sample_fails_with_no_G_matrix():
# N = 6
# num_levels = 4
# grid_jump = 2
# problem = {'bounds': [[0., 1.], [0., 1.], [0., 1.], [0., 1.]],
# 'num_vars': 4,
# 'groups': None}
# sample(problem, N, num_levels, grid_jump)


@raises(TypeError)
Expand Down

0 comments on commit d4dc4ed

Please sign in to comment.