## Main Evaluation: Seeding Strategies in Search-Based Unit Test Generation for Python

Provides the empirical evaluation for the Bachelorthesis of Lukas Steffens: Seeding Strategies in Search-Based Unit Test Generation for Python

In [2]:
# Do all necessary imports here
import itertools as it
import statistics

from bisect import bisect_left
from pathlib import Path
from typing import List, Tuple, Optional, Dict

import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylatex
import scipy.stats as ss
import seaborn as sns
import glob

from pandas import Categorical

### Useful Functions for Evaluation

Implement the Vargha and Delaney (Â12) effect size statistics,
taken from a [GitHub Gist](https://gist.github.com/jacksonpradolima/f9b19d65b7f16603c837024d5f8c8a65).

In [3]:
def vd_a(treatment: List[float], control: List[float]) -> Tuple[float, str]:
    """Compute Vargha and Delaney A index

    A. Vargha and H. D. Delaney.  A critique and improvement of the CL common language
    effect size statistics of McGraw and Wong.  Journal of Educational and Behavioral
    Statistics, 25(2):101-132, 2000.

    The formula to compute A has been transformed to minimise accuracy errors, see
    https://mtorchiano.wordpress.com/2014/05/19/effect-size-of-r-precision/

    :param treatment: a list of numbers
    :param control: a list of numbers
    :return: the value estimate and the magnitude
    """
    m = len(treatment)
    n = len(control)

    #if m != n:
    #    raise ValueError("Parameter lists must have equal lengths")

    r = ss.rankdata(treatment + control)
    r1 = sum(r[0:m])

    # Compute the measure
    # A = (r1/m - (m+1)/2)/n  # formula (14) in Vargha and Delaney, 2000
    A = (2 * r1 - m * (m + 1)) / (2 * n * m)  # equivalent formula with better accuracy

    levels = [0.147, 0.33, 0.474]
    magnitudes = ["negligible", "small", "medium", "large"]
    scaled_A = (A - 0.5) * 2

    magnitude = magnitudes[bisect_left(levels, abs(scaled_A))]
    estimate = A

    return estimate, magnitude

Implement helper functions for calculating the Vargha and Delaney (Â12) effect size statistics and the Mann Whitney U Test for the different scenarios.

In [4]:
def calc_a12(projectname: str, strategy1: str, strategy2: str, cov_values: pd.DataFrame):
    treatment_row = cov_values.loc[(cov_values['ConfigurationId'] == strategy1) & (cov_values['Project'] == projectname)]
    control_row = cov_values.loc[(cov_values['ConfigurationId'] == strategy2) & (cov_values['Project'] == projectname)]
    treatment = treatment_row.iloc[0]['Coverage Values']
    control = control_row.iloc[0]['Coverage Values']
    return vd_a(treatment, control)

In [5]:
def calc_mwu(projectname: str, strategy1: str, strategy2: str, vals: pd.DataFrame):
    alpha = 0.05
    p_vals = []

    for _, row in cov_values_by_module.iterrows():
        if row[config] == strategy1 and row[project] == projectname:
            l_s2 = vals.loc[(vals[config] == strategy2) & (vals[project] == projectname) & (vals[cut] == row[cut])]
            try:
                mwu = ss.mannwhitneyu(row['Coverage Values'], l_s2.iloc[0]['Coverage Values'], alternative='two-sided')
                p_vals.append(mwu.pvalue)
            except ValueError:
                p_vals.append(1) # if all values for both strategies are equal
                print('All values are equal.')
            except IndexError:
                print('No suitable other config found') # if for one config the run failed
    
    count_sig = 0
    for val in p_vals:
        if val < alpha:
            count_sig += 1
            
    return float(count_sig) / len(p_vals)

In [6]:
def calc_mwu2(projectname: str, strategy1: str, strategy2: str, vals: pd.DataFrame):
    alpha = 0.05
    a12_vals = []
    p_vals = []

    for _, row in cov_values_by_module.iterrows():
        if row[config] == strategy1 and row[project] == projectname:
            l_s2 = vals.loc[(vals[config] == strategy2) & (vals[project] == projectname) & (vals[cut] == row[cut])]
            try:
                val, val_string = vd_a(row['Coverage Values'], l_s2.iloc[0]['Coverage Values'])
                #a12_vals.append(val)
                mwu = ss.mannwhitneyu([val], [0.5], alternative='two-sided')
                p_vals.append(mwu.pvalue)
            except IndexError:
                print('No suitable other config found') # if for one config the run failed
            except ValueError:
                print("All values are equal")
                p_vals.append(1) # if all values for both strategies are equal
    
            
   # try:
   #     mwu = ss.mannwhitneyu(a12_vals, [0.5 for x in range(0, len(a12_vals))], alternative='greater')
   #     p_vals.append(mwu.pvalue)
   # except ValueError:
   #     print("All values are equal")
   #     p_vals.append(1) # if all values for both strategies are equal

    count_sig = 0
    for val in p_vals:
        print(val)
        if val < alpha:
            count_sig += 1
            
    return float(count_sig) / len(p_vals)

### Load Data From CSV Files

In [7]:
def reset_orig_dir():
    os.chdir('/home/l_pc1-l/ba/own_stuff/ba-thesis/evaluation/eval_env/notebooks')

In [18]:
# Combines all csv files in the current directory and recursively to one csv file.
#adjust the below path on your machine
os.chdir("/home/l_pc1-l/ba/own_stuff/ba-thesis/evaluation/results/main_run/data")
extension = 'csv'
all_filenames = [i for i in glob.glob('./**/*.{}'.format(extension), recursive=True)]

#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "../results.csv", index=False, encoding='utf-8-sig')

In [17]:
reset_orig_dir()
# The names of the columns we are interested in
cut = "TargetModule"
project_name = "ProjectName"
config = "ConfigurationId"
coverage = "Coverage"

# How often every CUT was executed
runs = 30
os.getcwd()
# Adjust the following paths on your system if you want to rerun this sheet!
PAPER_EXPORT_PATH = Path("/home/l_pc1-l/ba/own_stuff/ba-thesis/evaluation")

results = pd.read_csv(Path("../..") / "results" / "main_run" / "results.csv")
project_information = pd.read_csv(Path("../..") / "results" / "main_run" / "projects.csv")

In [10]:
number_cuts = len(set(results[cut]))
print(f"I tested {number_cuts} unique classes, each being executed {runs} times per configuration")

We tested 106 unique classes, each being executed 30 times per configuration


In [11]:
f_config_names = list(set(results[config]))
config_names = [n for n in f_config_names if type(n) is str]
config_names.sort()
print("I used {} configurations, namely:\n - {}".format(
    len(config_names), "\n - ".join(config_names)
))

I used 5 configurations, namely:
 - All
 - Baseline
 - Initial
 - Static
 - Static_and_Dynamic


In [12]:
# Add a column containing the project name to the evaluation results.
projects = []
for _, row in results.iterrows():
    projects.append(row[cut].split(".")[0])

results.insert(1, "Project", projects, True)
project = "Project"

### Creating different datasets for the different tables

The results obtained from the Pynguin runs need to be adapted to my needs. I create different datasets which I use later to create tables.

In [13]:
table_data = results.groupby([config, project], as_index=False).agg(
    {
        coverage: "mean",
    }
)

cov_values = results.groupby([config, project])[coverage].apply(list).reset_index(name='Coverage Values')
cov_values_by_module = results.groupby([config, project, cut])[coverage].apply(list).reset_index(name='Coverage Values')


merged = pd.merge(table_data, table_data, on=project)
static_baseline = merged.loc[(merged['ConfigurationId_x'] == 'Baseline') & (merged['ConfigurationId_y'] == 'Static')]
static_baseline.insert(2, "# Modules", [1,6,2,6,9,13,34,6,4,23], True)

static_dynamic_baseline = merged.loc[(merged['ConfigurationId_x'] == 'Baseline') & (merged['ConfigurationId_y'] == 'Static_and_Dynamic')]
static_dynamic_baseline.insert(2, "# Modules", [1,6,2,6,9,13,34,6,4,23], True)

initial_baseline = merged.loc[(merged['ConfigurationId_x'] == 'Baseline') & (merged['ConfigurationId_y'] == 'Initial')]
initial_baseline.insert(2, "# Modules", [1,6,2,6,9,13,34,6,4,23], True)

static_dynamic_static = merged.loc[(merged['ConfigurationId_x'] == 'Static') & (merged['ConfigurationId_y'] == 'Static_and_Dynamic')]
static_dynamic_static.insert(2, "# Modules", [1,6,2,6,9,13,34,6,4,23], True)

all_baseline = merged.loc[(merged['ConfigurationId_x'] == 'Baseline') & (merged['ConfigurationId_y'] == 'All')]
all_baseline.insert(2, "# Modules", [1,6,2,6,9,13,34,6,4,23], True)

## Tables for the different strategies

In the following I create Latex tables to visualize my answers to the research questions.

In [14]:
# Table for evaluating the static constant seeding against the baseline
table = pylatex.Table(position="H")
tabular = pylatex.Tabular('|c|c|c|c|c|c|', booktabs=True)
tabular.add_row([
    "Project",
    "# Modules",
    "Baseline",
    "Static",
    pylatex.NoEscape(r"Â\textsubscript{12}"),
    "% > 0.5"
])

tabular.add_hline()
for _, row in static_baseline.iterrows():
    tabular.add_row([
        row[project],
        row['# Modules'],
        "{:.4f}".format(row['Coverage_x']),
        "{:.4f}".format(row['Coverage_y']),
        calc_a12(row[project], "Static", "Baseline", cov_values),
        calc_mwu2(row[project], "Static", "Baseline", cov_values_by_module)
    ])

tabular.add_hline()
table.append(pylatex.NoEscape(r'\centering'))
table.append(tabular)
table.add_caption("Table showing the different values for seeding " +
                  "constants and the corresponding achieved coverage.")
label = pylatex.Label("tabconstvalues")
table.append(label)

#adjust this path if you want to store the table on your machine
with open ("../../../Thesis/chapters/evaluation_tables/base_static_table.tex", "w") as file:
    file.write(table.dumps())
print(table.dumps())

1.0
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
1
1.0
1
1
1
1
All values are equal
1
1.0
All values are equal
All values are equal
All values are equal
1
1.0
1.0
1
1.0
1
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
1
1
1.0
1
1
1.0
1
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
1
1.0
1
1
1
1
1
1.0
1
1
1.0
1.0
1.0
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
A

In [15]:
#table for evaluating static and dynamic constant seeding against Baseline
table = pylatex.Table(position="H")
tabular = pylatex.Tabular('|c|c|c|c|c|c|', booktabs=True)
tabular.add_row([
    "Project",
    "# Modules",
    "Baseline",
    "Static + Dynamic",
    pylatex.NoEscape(r"Â\textsubscript{12}"),
    "% > 0.5"
])

tabular.add_hline()
for _, row in static_dynamic_baseline.iterrows():
    tabular.add_row([
        row[project],
        row['# Modules'],
        "{:.4f}".format(row['Coverage_x']),
        "{:.4f}".format(row['Coverage_y']),
        calc_a12(row[project], "Static_and_Dynamic", "Baseline", cov_values),
        calc_mwu2(row[project], "Static_and_Dynamic", "Baseline", cov_values_by_module)
    ])

tabular.add_hline()
table.append(pylatex.NoEscape(r'\centering'))
table.append(tabular)
table.add_caption("Table showing the different values for seeding " +
                  "constants and the corresponding achieved coverage.")
label = pylatex.Label("tabconstvalues")
table.append(label)

#adjust this path if you want to store the table on your machine
with open ("../../../Thesis/chapters/evaluation_tables/base_static+dynamic_table.tex", "w") as file:
    file.write(table.dumps())
print(table.dumps())

1.0
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
1
1.0
1
1
1
1
1.0
1.0
All values are equal
All values are equal
All values are equal
1
1.0
1.0
1
1.0
1
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
1
1
1.0
1.0
1
1
1.0
1
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
1.0
1.0
1
1
1
1.0
1.0
1
1
1.0
1.0
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
All values are equal
No suitable other 

In [None]:
#table for evaluating static and dynamic constant seeding against static constant seeding
table = pylatex.Table(position="H")
tabular = pylatex.Tabular('|c|c|c|c|c|c|', booktabs=True)
tabular.add_row([
    "Project",
    "# Modules",
    "Static",
    "Static + Dynamic",
    pylatex.NoEscape(r"Â\textsubscript{12}"),
    "% > 0.5"
])

tabular.add_hline()
for _, row in static_dynamic_static.iterrows():
    tabular.add_row([
        row[project],
        row['# Modules'],
        "{:.4f}".format(row['Coverage_x']),
        "{:.4f}".format(row['Coverage_y']),
        calc_a12(row[project], "Static_and_Dynamic", "Static", cov_values),
        calc_mwu2(row[project], "Static_and_Dynamic", "Static", cov_values_by_module)
    ])

tabular.add_hline()
table.append(pylatex.NoEscape(r'\centering'))
table.append(tabular)
table.add_caption("Table showing the different values for seeding " +
                  "constants and the corresponding achieved coverage.")
label = pylatex.Label("tabconstvalues")
table.append(label)

#adjust this path if you want to store the table on your machine
with open ("../../../Thesis/chapters/evaluation_tables/static_static+dynamic_table.tex", "w") as file:
    file.write(table.dumps())
print(table.dumps())

In [None]:
#table for evaluating initial population seeding against Baseline
table = pylatex.Table(position="H")
tabular = pylatex.Tabular('|c|c|c|c|c|c|', booktabs=True)
tabular.add_row([
    "Project",
    "# Modules",
    "Baseline",
    "Initial",
    pylatex.NoEscape(r"Â\textsubscript{12}"),
    "% > 0.5"
])

tabular.add_hline()
for _, row in initial_baseline.iterrows():
    tabular.add_row([
        row[project],
        row['# Modules'],
        "{:.4f}".format(row['Coverage_x']),
        "{:.4f}".format(row['Coverage_y']),
        calc_a12(row[project], "Initial", "Baseline", cov_values),
        calc_mwu2(row[project], "Initial", "Baseline", cov_values_by_module)
    ])

tabular.add_hline()
table.append(pylatex.NoEscape(r'\centering'))
table.append(tabular)
table.add_caption("Table showing the different values for seeding " +
                  "constants and the corresponding achieved coverage.")
label = pylatex.Label("tabconstvalues")
table.append(label)

#adjust this path if you want to store the table on your machine
with open ("../../../Thesis/chapters/evaluation_tables/base_initial_table.tex", "w") as file:
    file.write(table.dumps())
print(table.dumps())

In [None]:
#table for evaluating all seeding strategies combined against Baseline
table = pylatex.Table(position="H")
tabular = pylatex.Tabular('|c|c|c|c|c|c|', booktabs=True)
tabular.add_row([
    "Project",
    "# Modules",
    "Baseline",
    "Static + Dynamic + Initial",
    pylatex.NoEscape(r"Â\textsubscript{12}"),
    "% > 0.5"
])

tabular.add_hline()
for _, row in all_baseline.iterrows():
    tabular.add_row([
        row[project],
        row['# Modules'],
        "{:.4f}".format(row['Coverage_x']),
        "{:.4f}".format(row['Coverage_y']),
        calc_a12(row[project], "All", "Baseline", cov_values),
        calc_mwu2(row[project], "All", "Baseline", cov_values_by_module)
    ])

tabular.add_hline()
table.append(pylatex.NoEscape(r'\centering'))
table.append(tabular)
table.add_caption("Table showing the different values for seeding " +
                  "constants and the corresponding achieved coverage.")
label = pylatex.Label("tabconstvalues")
table.append(label)

#adjust this path if you want to store the table on your machine
with open ("../../../Thesis/chapters/evaluation_tables/base_all_table.tex", "w") as file:
    file.write(table.dumps())
print(table.dumps())