Merge pull request #82 from JoerivanEngelen/master

Finding local maximum distance (after Ruano et al. 2012) as an alternative to the brute force approach.
SALib · Nov 26, 2015 · 2513de0 · 2513de0
2 parents f7590c5 + 9e53b0d
commit 2513de0
Show file tree

Hide file tree

Showing 13 changed files with 225 additions and 28 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -6,7 +6,8 @@ python:
 - "3.3"
 
 env: 
- - DEPS="numpy scipy matplotlib"
+ # Workaround for image_comparison issue in v1.5.0 of matplotlib - fix matplotlib to 1.4.3
+ - DEPS="numpy scipy matplotlib=1.4.3"
 
 install:
  - sudo apt-get update

diff --git a/SALib/sample/morris.py b/SALib/sample/morris.py
@@ -21,7 +21,7 @@
 #    - a parallel brute force method (incomplete)
 #    - a combinatorial optimisation approach (completed, but dependencies are
 #      not open-source)
-def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None):
+def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None, local_optimization=False):
     """Generates model inputs using for Method of Morris.
     
     Returns a NumPy matrix containing the model inputs required for Method of
@@ -33,7 +33,7 @@ def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None):
     
     - Vanilla Morris
     - Optimised trajectories when optimal_trajectories is set (using 
-      Campolongo's enhancements from 2007)
+      Campolongo's enhancements from 2007 and optionally Ruano's enhancement from 2012)
     - Groups with optimised trajectories when optimal_trajectores is set and 
       the problem definition specifies groups
     
@@ -45,6 +45,8 @@ def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None):
     computed to find the optimal combination of trajectories.  We suggest going
     no higher than 4 from a pool of 100 samples.
     
+    Update: With local_optimization = True, it is possible to go higher than the previously suggested 4 from 100.
+    
     Parameters
     ----------
     problem : dict
@@ -57,6 +59,10 @@ def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None):
         The grid jump size
     optimal_trajectories : int
         The number of optimal trajectories to sample (between 2 and N)
+    local_optimization : bool
+        Flag whether to use local optimization according to Ruano et al. (2012) 
+        Speeds up the process tremendously for bigger N and num_levels.
+        Stating this variable to be true causes the function to ignore gurobi.
     """
     if grid_jump >= num_levels:
         raise ValueError("grid_jump must be less than num_levels")
@@ -76,15 +82,16 @@ def sample(problem, N, num_levels, grid_jump, optimal_trajectories=None):
         if optimal_trajectories >= N:
             raise ValueError("The number of optimal trajectories should be less than the number of samples.")
 
-        if _has_gurobi == False:
+        if _has_gurobi == False and local_optimization == False:
 
             if optimal_trajectories > 10:
                 raise ValueError("Running optimal trajectories greater than values of 10 will take a long time.")
 
         sample = compute_optimised_trajectories(problem, 
                                                 sample, 
                                                 N, 
-                                                optimal_trajectories)
+                                                optimal_trajectories,
+                                                local_optimization)
 
     scale_samples(sample, problem['bounds'])
     return sample
@@ -150,7 +157,7 @@ def sample_groups(problem, N, num_levels, grid_jump):
     return sample.reshape((N * (g + 1), k))
 
 
-def compute_optimised_trajectories(problem, input_sample, N, k_choices):
+def compute_optimised_trajectories(problem, input_sample, N, k_choices, local_optimization = False):
     '''
     Calls the procedure to compute the optimum k_choices of trajectories
     from the input_sample.
@@ -163,7 +170,7 @@ def compute_optimised_trajectories(problem, input_sample, N, k_choices):
     if np.any((input_sample < 0) | (input_sample > 1)):
         raise ValueError("Input sample must be scaled between 0 and 1")
 
-    if _has_gurobi:
+    if _has_gurobi == True and local_optimization == False:
         maximum_combo = return_max_combo(input_sample,
                                          N,
                                          num_params,
@@ -175,7 +182,8 @@ def compute_optimised_trajectories(problem, input_sample, N, k_choices):
                                                  N,
                                                  num_params,
                                                  k_choices,
-                                                 groups)
+                                                 groups,
+                                                 local_optimization)
 
     num_groups = None
     if groups != None:
@@ -202,14 +210,17 @@ def compute_optimised_trajectories(problem, input_sample, N, k_choices):
                         default=2, help='Grid jump size (Morris only)')
     parser.add_argument('-k', '--k-optimal', type=int, required=False,
                         default=None, help='Number of optimal trajectories (Morris only)')
+    parser.add_argument('-o', '--local', type=bool, required=True, 
+                        default=False, help='Use the local optimisation method (Morris with optimization only)')
     args = parser.parse_args()
 
     np.random.seed(args.seed)
     rd.seed(args.seed)
 
     problem = read_param_file(args.paramfile)
     param_values = sample(problem, args.samples, args.levels, \
-                    args.grid_jump, args.k_optimal)
+                    args.grid_jump, args.k_optimal, args.local)
 
     np.savetxt(args.output, param_values, delimiter=args.delimiter,
                fmt='%.' + str(args.precision) + 'e')
+
diff --git a/SALib/sample/morris_util.py b/SALib/sample/morris_util.py
@@ -14,7 +14,6 @@
 import numpy as np
 import random as rd
 
-
 def generate_trajectory(G, num_levels, grid_jump):
     '''
     Returns a single trajectory of size (g+1)-by-k
@@ -118,7 +117,7 @@ def compute_distance(m, l):
     return distance
 
 
-def compute_distance_matrix(input_sample, N, num_params, groups=None):
+def compute_distance_matrix(input_sample, N, num_params, groups=None, local_optimization=False):
     '''
     groups is an integer representing the number of groups
     '''
@@ -136,7 +135,11 @@ def compute_distance_matrix(input_sample, N, num_params, groups=None):
         input_1 = input_sample[index_list[j]]
         for k in range(j + 1, N):
             input_2 = input_sample[index_list[k]]
-            distance_matrix[k, j] = compute_distance(input_1, input_2)    
+
+            if local_optimization is True:
+                distance_matrix[j, k] = compute_distance(input_1, input_2)
+
+            distance_matrix[k, j] = compute_distance(input_1, input_2)
     return distance_matrix
 
 
@@ -249,20 +252,129 @@ def compile_output(input_sample, N, num_params, maximum_combo, groups=None):
         output[index_list[counter]] = np.array(input_sample[index_list[x]])
     return output
 
+def sum_distances(indices, distance_matrix):
+    '''Calculate combinatorial distance between a select group of trajectories, indicated by indices
+    
+    indices = tuple
+    distance_matrix = array (M,M)
+    
+    Notes
+    -----
+    This function can perhaps be quickened by calculating the sum of the distances. 
+    The calculated distances, as they are right now, are only used in a relative way. 
+    Purely summing distances would lead to the same result, at a perhaps quicker rate.
+    '''  
+    combs_tup = np.array(tuple(combinations(indices, 2)))
 
-def find_optimum_combination(input_sample, N, num_params, k_choices, groups=None):
+    #Put indices from tuples into two-dimensional array.
+    combs = np.array([[i[0] for i in combs_tup], [i[1] for i in combs_tup]])
 
-    scores = find_most_distant(input_sample,
-                               N,
-                               num_params,
-                               k_choices,
-                               groups)
+    #Calculate distance (vectorized)
+    dist = np.sqrt(np.sum(np.square(distance_matrix[combs[0],combs[1]]), axis=0))
 
-    print(scores)
+    return dist
 
-    maximum_combo = find_maximum(scores, N, k_choices)
+def get_max_sum_ind(indices_list, distances, i, m):
+    '''Get the indices that belong to the maximum distance in an array of distances
+    
+    indices_list = list of tuples
+    distance = array (M)
+    i = int
+    m = int
+    '''
+    if len(indices_list) != len(distances):
+        raise ValueError("Indices and distances are lists of different length." +
+        "Length indices_list = " + str(len(indices_list)) + " and length distances = " + 
+        str(len(distances)) + ". In loop i = " + str(i) + " and m =  " + str(m))
+
+    max_index = tuple(distances.argsort()[-1:][::-1])
+    return indices_list[max_index[0]]
 
-    print(maximum_combo)
+def add_indices(indices, distance_matrix):
+    '''Adds extra indices for the combinatorial problem. 
+    For indices = (1,2) and M=5, the method returns [(1,2,3),(1,2,4),(1,2,5)]
+    
+    indices = tuple
+    distance_matrix = array (M,M)
+    '''
+    list_new_indices = []    
+    for i in range(0, len(distance_matrix)):
+        if i not in indices:
+            list_new_indices.append(indices+(i,))
+    return list_new_indices
+
+def find_local_maximum(input_sample, N, num_params, k_choices, groups = None):
+    '''An alternative by Ruano et al. (2012) for the brute force approach as 
+    originally proposed by Campolongo et al. (2007). The method should improve 
+    the speed with which an optimal set of trajectories is found tremendously 
+    for larger sample sizes.
+    '''
+
+    if groups is not None:
+        raise ValueError('Method not tested nor developed with groups. Adapt the code to work with groups.')
+
+    distance_matrix = compute_distance_matrix(input_sample, N, num_params, groups, local_optimization=True)    
+
+    tot_indices_list = []
+    tot_max_array = np.zeros(k_choices-1)
+
+    #############Loop 'i'#############
+    #i starts at 1
+    for i in range(1,k_choices):
+        indices_list = [] 
+        row_maxima_i = np.zeros(len(distance_matrix))
+
+        row_nr = 0
+        for row in distance_matrix:
+            indices =  tuple(row.argsort()[-i:][::-1]) + (row_nr,)
+            row_maxima_i[row_nr] = sum_distances(indices, distance_matrix)
+            indices_list.append(indices)
+            row_nr += 1
+
+        #Find the indices belonging to the maximum distance 
+        i_max_ind = get_max_sum_ind(indices_list,row_maxima_i, i, 0)
 
-    return maximum_combo
+        #########Loop 'm' (called loop 'k' in Ruano)############
+        m_max_ind = i_max_ind
+        #m starts at 1
+        m = 1
+
+        while m <= k_choices-i-1: 
+            m_ind = add_indices(m_max_ind, distance_matrix)
+
+            m_maxima = np.zeros(len(m_ind))
+
+            for n in range(0,len(m_ind)):
+                m_maxima[n] = sum_distances(m_ind[n], distance_matrix)  
+
+            m_max_ind = get_max_sum_ind(m_ind, m_maxima, i, m)
+
+            m += 1
+
+        tot_indices_list.append(m_max_ind)
+        tot_max_array[i-1] = sum_distances(m_max_ind, distance_matrix)
+
+    tot_max = get_max_sum_ind(tot_indices_list, tot_max_array, "tot", "tot")
+    return sorted(list(tot_max))
+
+
+
+def find_optimum_combination(input_sample, N, num_params, k_choices, groups=None, local_optimization=False):
+
+    if local_optimization is True:
+        maximum_combo = find_local_maximum(input_sample, N, num_params, k_choices, groups)
+
+    else:
+        scores = find_most_distant(input_sample,
+                                   N,
+                                   num_params,
+                                   k_choices,
+                                   groups)
+
+        #print(scores)
+
+        maximum_combo = find_maximum(scores, N, k_choices)
+
+        #print(maximum_combo)
 
+    return maximum_combo
diff --git a/SALib/tests/test_morris_util.py b/SALib/tests/test_morris_util.py
@@ -15,7 +15,9 @@
                                 compute_distance_matrix, \
                                 find_most_distant, find_maximum, \
                                 make_index_list, \
-                                check_input_sample
+                                check_input_sample, \
+                                find_local_maximum, \
+                                sum_distances, get_max_sum_ind, add_indices
 
 
 def setup():
@@ -134,6 +136,60 @@ def test_compute_distance_matrix():
     expected[5, :] = [7.52, 5.99, 5.52, 7.31, 5.77, 0]
     assert_allclose(output, expected, rtol=1e-2)
 
+def test_compute_distance_matrix_local():
+    '''
+    Tests that a distance matrix is computed correctly for the local distance optimization.
+    The only change is that the local method needs the upper triangle of 
+    the distance matrix instead of the lower one.
+    
+    This is for an input of six trajectories and two parameters
+    '''
+    sample_inputs = setup()
+    output = compute_distance_matrix(sample_inputs, 6, 2, local_optimization=True)
+    expected = np.zeros((6, 6), dtype=np.float32)
+    expected[0, :] = [0,    5.50, 6.18, 6.89, 6.18, 7.52]
+    expected[1, :] = [5.50, 0,    5.31, 6.18, 5.31, 5.99]
+    expected[2, :] = [6.18, 5.31, 0,    6.57, 5.41, 5.52]
+    expected[3, :] = [6.89, 6.18, 6.57, 0,    5.50, 7.31]
+    expected[4, :] = [6.18, 5.31, 5.41, 5.5,  0,    5.77]
+    expected[5, :] = [7.52, 5.99, 5.52, 7.31, 5.77, 0   ]
+    assert_allclose(output, expected, rtol=1e-2)
+
+def test_sum_distances():
+    '''
+    Tests whether the combinations are summed correctly. 
+    '''
+    sample_inputs = setup()
+    dist_matr = compute_distance_matrix(sample_inputs, 6, 2, local_optimization=True)
+    indices = (1,3,2)
+    distance = sum_distances(indices, dist_matr)
+
+    expected = 10.47
+    assert_allclose(distance, expected, rtol=1e-2)
+
+def test_get_max_sum_ind():
+    '''
+    Tests whether the right maximum indices are returned.
+    '''
+    indices = np.array([(1,2,4),(3,2,1),(4,2,1)])
+    distances = np.array([20, 40, 50])
+
+    output = get_max_sum_ind(indices, distances, 0, 0)
+    expected = (4,2,1)    
+
+    assert_equal(output, expected)
+
+def test_add_indices():
+    '''
+    Tests whether the right indices are added.
+    '''
+    indices = (1,3,4)
+    matr = np.zeros((6,6), dtype = np.int16)
+    ind_extra = add_indices(indices, matr)
+
+    expected = [(1,3,4,0),(1,3,4,2),(1,3,4,5)]
+
+    assert_equal(ind_extra, expected)
 
 def test_combo_from_find_most_distant():
     '''
@@ -150,6 +206,22 @@ def test_combo_from_find_most_distant():
     expected = [0, 2, 3, 5]  # trajectories 1, 3, 4, 6
     assert_equal(output, expected)
 
+def test_find_local_maximum_distance():
+    '''
+    Test whether finding the local maximum distance equals the global maximum distance
+    in a simple case. From Saltelli et al. 2008, in the solution to exercise 3a,
+    Chapter 3, page 134.
+    '''
+
+    sample_inputs = setup()
+    N=6
+    num_params = 2
+    k_choices = 4
+    scores_global = find_most_distant(sample_inputs, N, num_params, k_choices)
+    output_global = find_maximum(scores_global, N, k_choices)
+    output_local = find_local_maximum(sample_inputs, N, num_params, k_choices)
+    assert_equal(output_global, output_local)
+
 
 def test_scores_from_find_most_distant():
     '''
@@ -212,6 +284,12 @@ def test_make_index_list_with_groups():
     desired = [np.array([0, 1, 2]), np.array([3, 4, 5]), np.array([6, 7, 8]), np.array([9, 10, 11])]
     assert_equal(desired, actual)
 
+@raises(ValueError)
+def test_get_max_sum_ind_Error():
+    indices = [(1,2,4),(3,2,1),(4,2,1)]
+    distances_wrong = [20,40]
+
+    get_max_sum_ind(indices, distances_wrong, 0, 0)
 
 @raises(AssertionError)
 def test_check_input_sample_N():

diff --git a/result_images/test_plotting/covariance_plot-expected.png b/result_images/test_plotting/covariance_plot-expected.png
diff --git a/result_images/test_plotting/covariance_plot.png b/result_images/test_plotting/covariance_plot.png
diff --git a/result_images/test_plotting/covariance_plot_grouped-expected.png b/result_images/test_plotting/covariance_plot_grouped-expected.png
diff --git a/result_images/test_plotting/covariance_plot_grouped.png b/result_images/test_plotting/covariance_plot_grouped.png
diff --git a/result_images/test_plotting/horizontal_bar_plot-expected.png b/result_images/test_plotting/horizontal_bar_plot-expected.png
diff --git a/result_images/test_plotting/horizontal_bar_plot.png b/result_images/test_plotting/horizontal_bar_plot.png
diff --git a/result_images/test_plotting/sample_histograms-expected.png b/result_images/test_plotting/sample_histograms-expected.png
diff --git a/result_images/test_plotting/sample_histograms.png b/result_images/test_plotting/sample_histograms.png