# Evaluation measurements for a domain-oriented test suite

Efficiency: We define efficiency criterion as the ability to discover the performance boundaries in the fewest number of samples to the SUT. This is important given the limited number of samples available to characterize realistic, high-dimensional state spaces. We measure efficiency using the following metrics:

Precision - The percentage of samples which are withing D of performance boundaries.

convergence - The amount of queries necessary to acquire samples on all performance boundaries.

Resolution - The distance of the samples from the nearest performance boundary.

In [1]:
def select_boundry_points(delta, test_suite_performance_modes, test_suite_performance_boundaries_distances, candidate_performance_mode):
    fitness_call_no = len(test_suite_performance_boundaries_distances)
    selected_index = [ind for ind, x in enumerate(test_suite_performance_modes) if x == candidate_performance_mode]
    boundry_points = [x for ind, x in enumerate(test_suite_performance_boundaries_distances) if x <= delta and
                     ind in selected_index]
    return boundry_points, fitness_call_no

In [2]:
# Precision - The percentage of samples which are withing D of performance boundaries
def cal_precision(delta, test_suite_performance_modes, test_suite_performance_boundaries_distances, candidate_performance_mode=None):
    """
    cal_precision computes total number of test data that were at a certain distance from the boundary
    divided by the total number of test data that were sent to the fitness function.
    
    :param delta: describe a certain distance from the boundary
    :param test_suite_performance_modes: list of class labels, 0 for out-of-domain and 1 for in-domain
    :param test_suite_performance_boundaries_distances: list of branch distance
    :param candidate_performance_mode: precision will be computed for this performance_mode,
        None value means that all performance modes must be used
        
    :return: precision percentage
    """ 
    if candidate_performance_mode is None:
        precision_list = []
        for mode in range(max(test_suite_performance_modes) + 1):
            if mode not in test_suite_performance_modes:
                precision_list.append(0)
                continue
            else:
                candidate_performance_mode = mode
                boundry_points, fitness_call_no = select_boundry_points(delta, test_suite_performance_modes, test_suite_performance_boundaries_distances, candidate_performance_mode)
                precision = (len(boundry_points) / fitness_call_no) * 100
                precision_list.append(precision)
        return precision_list
    else:
        boundry_points, fitness_call_no = select_boundry_points(delta, test_suite_performance_modes, test_suite_performance_boundaries_distances, candidate_performance_mode)
        precision = (len(boundry_points) / fitness_call_no) * 100
        return precision

In [3]:
# convergence - The amount of queries necessary to acquire samples on all performance boundaries
def cal_convergence(delta, test_suite_performance_modes, test_suite_performance_boundaries_distances, candidate_performance_mode=None):
    """
    cal_convergence computes total number of test data that were sent to the fitness function minus
    the number of test data that were at a certain distance from the boundary.
    
    :param delta: describe a certain distance from the boundary
    :param test_suite_performance_modes: list of class labels, 0 for out-of-domain and 1 for in-domain
    :param test_suite_performance_boundaries_distances: list of branch distance
    :param candidate_performance_mode: precision will be computed for this performance_mode,
        None value means that all performance modes must be used
        
    :return: convergence
    """ 
    if candidate_performance_mode is None:
        convergence_list = []
        for mode in range(max(test_suite_performance_modes) + 1):
            if mode not in test_suite_performance_modes:
                precision_list.append(0)
                continue
            else:
                candidate_performance_mode = mode
                boundry_points, fitness_call_no = select_boundry_points(delta, test_suite_performance_modes, test_suite_performance_boundaries_distances, candidate_performance_mode)
                convergence = fitness_call_no - len(boundry_points)
                convergence_list.append(convergence)
        return convergence_list
    else:
        boundry_points, fitness_call_no = select_boundry_points(delta, test_suite_performance_modes, test_suite_performance_boundaries_distances, candidate_performance_mode)
        convergence = fitness_call_no - len(boundry_points)
        return convergence

In [4]:
#  Resolution - The distance of the samples from the nearest performance boundary
def cal_resolution(baseline_test_suite_performance_boundaries_distances, test_suite_performance_boundaries_distances):
    """
    cal_resolution computes the average distance of each test data produced by the proposed method
    from the closest sample of the base method.
    
    :param baseline_test_suite_performance_boundaries_distances : list of branch distance using baseline method
    :param test_suite_performance_boundaries_distances: list of branch distance using proposed method
        
    :return: resolution
    """ 
    min_point = min(baseline_test_suite_performance_boundaries_distances)
    distance_list = [(point - min_point) for point in test_suite_performance_boundaries_distances]
    avg_distance = sum(distance_list) / len(distance_list)
    return avg_distance

Diversity: We define diversity objective as the ability to find representative scenarios from all the performance boundaries of
the SUT. This means sampling uniformly across the entire region where the performance boundary occurs as well as achieving even distributions amongst all performance boundaries. We measure this using the following metrics:

Coverage - The percentage of the performance boundary regions that have been sampled.

Class distribution - The distribution of samples from the different performance modes.

Boundary distribution - The distribution of samples from the different performance boundary types.


In [None]:
def train_cluster_model(test_suite_performance_boundaries_distances):
    """
    Clustering process with the data generated by the proposed method
    """
    clusterModel=None
    
    # process...
    #clusterModel=ClusterModel(test_suite_performance_boundaries_distances)
    
    return clusterModel
  

In [None]:
def cluster_method(baseline_test_suite_performance_boundaries_distances,baseline_test_suite_performance_modes):
    baseline_cluster_list=[]
    """
    Calculation of clusters with generated data and their labels
    """
    # process...
    
     return baseline_cluster_list
    
    

In [2]:
# Coverage - The percentage of the performance boundary re- gions that have been sampled.
def cal_coverage(baseline_test_suite_performance_boundaries_distances,baseline_test_suite_performance_modes, test_suite_performance_boundaries_distances,candidate_performance_mode):
    """
    
    
    :param baseline_test_suite_performance_boundaries_distances : list of branch distance using baseline method
    :param baseline_test_suite_performance_modes: list of class labels
    :param test_suite_performance_boundaries_distances: list of branch distance
    :param candidate_performance_mode: precision will be computed for this performance_mode
        
    :return: coverage
    """ 
    baseline_cluster_list=[]
    correct_prediction_clusters=[]
    
    baseline_cluster_list=cluster_method(baseline_test_suite_performance_boundaries_distances,baseline_test_suite_performance_modes)
    clusterModel=train_cluster_model(test_suite_performance_boundaries_distances)            
    
    for cluster in baseline_cluster_list:
        boundry_points, fitness_call_no = select_boundry_points(delta, baseline_test_suite_performance_modes, baseline_test_suite_performance_boundaries_distances, candidate_performance_mode)
        cluster_predicted=clusterModel.predict(boundry_points)
        if cluster_predicted==cluster:
            correct_prediction_clusters.append(cluster_predicted)
    
    coverage=(len(correct_prediction_clusters)/len(baseline_cluster_list))*100
    return coverage
        
        
        

In [None]:
# Class distribution - The distribution of samples from the different performance modes.
def cal_class_distribution(test_suite_performance_modes):
    """

    :param test_suite_performance_modes: list of class labels, 0 for out-of-domain and 1 for in-domain
    
    :return: class_distribution
    """ 
    num_zeros = test_suite_performance_modes.count(0)
    num_ones = test_suite_performance_modes.count(1)

    total = len(test_suite_performance_modes)

    percent_out_of_domain = (num_zeros / total) * 100
    percent_in_domain = (num_ones / total) * 100
    
    print("Percentage of out of domain:", percent_out_of_domain)
    print("Percentage of in domain:", percent_in_domain)
    
    return percent_in_domain,percent_out_of_domain