Merge pull request #364 from ConnectedSystems/improved-ishigami

Improved implementation of Ishigami function
SALib · Oct 4, 2020 · cf857c2 · cf857c2
2 parents c36ad98 + 67aa8cd
commit cf857c2
Show file tree

Hide file tree

Showing 5 changed files with 293 additions and 19 deletions.
diff --git a/examples/pawn/pawn.bat b/examples/pawn/pawn.bat
@@ -0,0 +1,65 @@
+@echo off
+
+REM Example: generating samples from the command line
+salib sample latin ^
+	-n 1000 ^
+	-p ../../src/SALib/test_functions/params/Ishigami.txt ^
+	-o ../data/model_input.txt ^
+	--delimiter=" " ^
+	--precision=8 ^
+	--seed=100
+
+REM You can also use the module directly through Python
+REM python -m SALib.sample.latin ^
+REM 	   -n 1000 ^
+REM 	   -p ./src/SALib/test_functions/params/Ishigami.txt ^
+REM 	   -o model_input.txt ^
+REM 	   --delimiter=" " ^
+REM 	   --precision=8 ^
+REM 		 --seed=100
+
+
+
+REM Options:
+REM -p, --paramfile: Your parameter range file (3 columns: parameter name, lower bound, upper bound)
+REM
+REM -n, --samples: Sample size.
+REM				 Number of model runs is N
+REM
+REM -o, --output: File to output your samples into.
+REM
+REM --delimiter (optional): Output file delimiter.
+REM
+REM --precision (optional): Digits of precision in the output file. Default is 8.
+REM
+REM -M: RBD-FAST M coefficient, default 4
+REM
+REM -s, --seed (optional): Seed value for random number generation
+
+REM Run the model using the inputs sampled above, and save outputs
+python -c "from SALib.test_functions import Ishigami; import numpy as np; np.savetxt('../data/model_output.txt', Ishigami.evaluate(np.loadtxt('../data/model_input.txt')))"
+
+REM Then use the output to run the analysis.
+REM Sensitivity indices will print to command line. Use ">" to write to file.
+
+salib analyze pawn ^
+	-p ../../src/SALib/test_functions/params/Ishigami.txt ^
+	-X ../data/model_input.txt ^
+	-Y ../data/model_output.txt ^
+	--seed=100
+
+REM python -m SALib.analyze.rbd_fast ^
+REM 	-p ../../src/SALib/test_functions/params/Ishigami.txt ^
+REM 	-X ../data/model_input.txt ^
+REM 	-Y ../data/model_output.txt ^
+REM 	--seed=100
+
+REM Options:
+REM -p, --paramfile: Your parameter range file (3 columns: parameter name, lower bound, upper bound)
+REM
+REM -X, --model-input-file: File of model input values to analyze
+REM -Y, --model-output-file: File of model output values to analyze
+REM
+REM --delimiter (optional): Model output file delimiter.
+REM
+REM -s, --seed (optional): Seed value for random number generation
diff --git a/examples/pawn/pawn.py b/examples/pawn/pawn.py
@@ -0,0 +1,24 @@
+import sys
+sys.path.append('../..')
+
+from SALib.analyze import pawn
+from SALib.sample import latin
+from SALib.test_functions import Ishigami
+from SALib.util import read_param_file
+
+# Read the parameter range file and generate samples
+problem = read_param_file('../../src/SALib/test_functions/params/Ishigami.txt')
+
+# Generate samples
+param_values = latin.sample(problem, 1000)
+
+# Run the "model" and save the output in a text file
+# This will happen offline for external models
+Y = Ishigami.evaluate(param_values)
+
+# Perform the sensitivity analysis using the model output
+# Specify which column of the output file to analyze (zero-indexed)
+Si = pawn.analyze(problem, param_values, Y, S=10, print_to_console=True)
+# Returns a dictionary with key 'PAWN'
+# e.g. Si['PAWN'] contains the total-order index for each parameter, in the
+# same order as the parameter file
diff --git a/examples/pawn/pawn.sh b/examples/pawn/pawn.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Example: generating samples from the command line
+salib sample latin \
+	-n 1000 \
+	-p ../../src/SALib/test_functions/params/Ishigami.txt \
+	-o ../data/model_input.txt \
+	--delimiter=' ' \
+	--precision=8 \
+	--seed=100
+
+# You can also use the module directly through Python
+# python -m SALib.sample.latin \
+# 	   -n 1000 \
+# 	   -p ./src/SALib/test_functions/params/Ishigami.txt \
+# 	   -o model_input.txt \
+# 	   --delimiter=' ' \
+# 	   --precision=8 \
+# 		 --seed=100
+
+# Run the model using the inputs sampled above, and save outputs
+python -c "from SALib.test_functions import Ishigami; import numpy as np; np.savetxt('../data/model_output.txt', Ishigami.evaluate(np.loadtxt('../data/model_input.txt')))"
+
+# Then use the output to run the analysis.
+# Sensitivity indices will print to command line. Use ">" to write to file.
+
+salib analyze pawn \
+	-p ../../src/SALib/test_functions/params/Ishigami.txt \
+	-X ../data/model_input.txt \
+	-Y ../data/model_output.txt \
+	--seed=100
+
+# Options:
+# -p, --paramfile: Your parameter range file (3 columns: parameter name, lower bound, upper bound)
+#
+# -Y, --model-output-file: File of model output values to analyze
+# -X, --model-input-file: File of model input values to analyze
+# -S, --slices: Number of slices to take
+#
+# --delimiter (optional): Model output file delimiter.
+#
+# -s, --seed (optional): Seed value for random number generation
diff --git a/src/SALib/analyze/pawn.py b/src/SALib/analyze/pawn.py
@@ -0,0 +1,109 @@
+import numpy as np
+from scipy.stats import ks_2samp
+
+from . import common_args
+
+from ..util import (read_param_file, ResultDict, 
+                    extract_group_names, _check_groups)
+
+
+def analyze(problem, X, Y, S=10, print_to_console=False, seed=None):
+    """Performs PAWN sensitivity analysis.
+
+    Ported from an implementation in `R` by Baroni & Francke (2020)
+    https://github.com/baronig/GSA-cvd
+
+
+    Parameters
+    ----------
+    problem : dict
+        The problem definition
+    X : numpy.array
+        A NumPy array containing the model inputs
+    Y : numpy.array
+        A NumPy array containing the model outputs
+    S : int
+        Number of slides (default 10)
+    print_to_console : bool
+        Print results directly to console (default False)
+    seed : int
+        Seed value to ensure deterministic results
+
+    References
+    ----------
+    .. [1] Pianosi, F., Wagener, T., 2015. 
+           A simple and efficient method for global sensitivity analysis 
+           based on cumulative distribution functions. 
+           Environmental Modelling & Software 67, 1–11. 
+           https://doi.org/10.1016/j.envsoft.2015.01.004
+
+
+    Examples
+    --------
+    >>> X = latin.sample(problem, 1000)
+    >>> Y = Ishigami.evaluate(X)
+    >>> Si = pawn.analyze(problem, X, Y, S=10, print_to_console=False)
+    """
+    if seed:
+        np.random.seed(seed)
+
+    groups = _check_groups(problem)
+    print("Groups: ", groups)
+    if not groups:
+        D = problem['num_vars']
+    else:
+        _, D = extract_group_names(problem.get('groups'))
+
+    result = np.full((D, ), np.nan)
+    temp_pawn = np.full((S, D), np.nan)
+
+    step = (1/S)
+    for d_i in range(D):
+        seq = np.arange(0, 1+step, step)
+        X_q = np.nanquantile(X[:, d_i], seq)
+
+        for s in range(S):
+            Y_sel = Y[(X[:, d_i] >= X_q[s]) & (X[:, d_i] < X_q[s+1])]
+
+            # KD value
+            # Function returns a KS object which holds the KS statistic
+            # and p-value
+            # Note from documentation:
+            # if the K-S statistic is small or the p-value is high, then 
+            # we cannot reject the hypothesis that the distributions of 
+            # the two samples are the same.
+            ks = ks_2samp(Y_sel, Y)
+            temp_pawn[s, d_i] = ks.statistic 
+
+        result[d_i] = np.median(temp_pawn[:, d_i])
+
+    Si = ResultDict([('PAWN', result)])
+    Si['names'] = problem['names']
+
+    if print_to_console:
+        print(Si.to_df())
+
+    return Si
+
+
+def cli_parse(parser):
+    parser.add_argument('-X', '--model-input-file',
+                        type=str, required=True, help='Model input file')
+
+    parser.add_argument('-S', '--slices',
+                        type=int, required=False, help='Number of slices to take')
+    return parser
+
+
+def cli_action(args):
+    problem = read_param_file(args.paramfile)
+    X = np.loadtxt(args.model_input_file,
+                   delimiter=args.delimiter)
+    Y = np.loadtxt(args.model_output_file,
+                   delimiter=args.delimiter,
+                   usecols=(args.column,))
+    analyze(problem, X, Y, S=args.slices, print_to_console=True, seed=args.seed)
+
+
+if __name__ == "__main__":
+    common_args.run_cli(cli_parse, cli_action)
diff --git a/src/SALib/test_functions/Ishigami.py b/src/SALib/test_functions/Ishigami.py
@@ -1,24 +1,58 @@
-import math
-
 import numpy as np
 
 
-# Non-monotonic Ishigami Function (3 parameters)
-# Using Saltelli sampling with a sample size of ~1000
-# the expected first-order indices would be:
-# x1: 0.3139
-# x2: 0.4424
-# x3: 0.0
-def evaluate(values):
-    Y = np.zeros(values.shape[0])
-    A = 7
-    B = 0.1
-
-    # X = values
-    # Y = np.sin(X[:, 0]) + A * np.power(np.sin(X[:, 1]), 2) + \
-    #         B * np.power(X[:, 2], 4) * np.sin(X[:, 0])
-    for i, X in enumerate(values):
-        Y[i] = np.sin(X[0]) + A * np.power(np.sin(X[1]), 2) + \
-            B * np.power(X[2], 4) * np.sin(X[0])
+def evaluate(X: np.ndarray, A: float = 7.0, B: float = 0.1) -> np.ndarray:
+    """Non-monotonic Ishigami-Homma three parameter test function:
+
+    `f(x) = \sin(x_{1}) + A \sin(x_{2})^2 + Bx^{4}_{3}\sin(x_{1})`
+
+    This test function is commonly used to benchmark global sensitivity 
+    methods as variance-based sensitivities of this function can be 
+    analytically determined.
+
+    See listed references below.
+
+    In [2], the expected first-order indices are:
+
+        x1: 0.3139
+        x2: 0.4424
+        x3: 0.0
+
+    when A = 7, B = 0.1 when conducting Sobol' analysis with the
+    Saltelli sampling method with a sample size of 1000.
+
+
+    Parameters
+    ----------
+    X : np.ndarray
+        An `N*D` array holding values for each parameter, where `N` is the 
+        number of samples and `D` is the number of parameters 
+        (in this case, three).
+    A : float
+        Constant `A` parameter
+    B : float
+        Constant `B` parameter
+
+    Returns
+    -------
+    Y : np.ndarray
+
+    References
+    ----------
+    .. [1] Ishigami, T., Homma, T., 1990. 
+           An importance quantification technique in uncertainty analysis for 
+           computer models. 
+           Proceedings. First International Symposium on Uncertainty Modeling 
+           and Analysis. 
+           https://doi.org/10.1109/ISUMA.1990.151285
+
+    .. [2] Saltelli, A., Ratto, M., Andres, T., Campolongo, F., Cariboni, J., 
+           Gatelli, D., Saisana, M., Tarantola, S., 2008. 
+           Global Sensitivity Analysis: The Primer. Wiley, West Sussex, U.K.
+           https://dx.doi.org/10.1002/9780470725184
+    """
+    Y = np.zeros(X.shape[0])
+    Y = np.sin(X[:, 0]) + A * np.power(np.sin(X[:, 1]), 2) + \
+            B * np.power(X[:, 2], 4) * np.sin(X[:, 0])
 
     return Y