## Setup Evaluation: Seeding Strategies in Search-Based Unit Test Generation for Python

Provides a part of the setup for the empirical evaluation of the bachelor's thesis: Seeding Strategies in Search-Based Unit Test Generation for Python

**In this Notebook I determine the optimal value for the probability of seeding constants observed statically from the source code.**

In [1]:
# Do all necessary imports here
import itertools as it
import statistics

from bisect import bisect_left
from pathlib import Path
from typing import List, Tuple, Optional, Dict

import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylatex
import scipy.stats as ss
import seaborn as sns
import glob

from pandas import Categorical

### Load Data From CSV Files

In [2]:
def reset_orig_dir():
    os.chdir('/home/l_pc1-l/ba/own_stuff/ba-thesis/evaluation/eval_env/notebooks')

In [3]:
# Combines all csv files in the current directory and recursively to one csv file.
#adjust the below path on your machine
os.chdir("/home/l_pc1-l/ba/own_stuff/ba-thesis/evaluation/results/opt_const/data")
extension = 'csv'
all_filenames = [i for i in glob.glob('./**/*.{}'.format(extension), recursive=True)]

#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "../results.csv", index=False, encoding='utf-8-sig')

In [17]:
reset_orig_dir()
# The names of the columns we are interested in
cut = "TargetModule"
project_name = "ProjectName"
config = "ConfigurationId"
coverage = "Coverage"
seconds = [f"CoverageTimeline_T{i}" for i in range(1, 601)]

# How often every CUT was executed
runs = 10
# Adjust the following path on your system if you want to rerun this sheet!
PAPER_EXPORT_PATH = Path("/home/l_pc1-l/ba/own_stuff/ba-thesis/evaluation")

results = pd.read_csv(Path("../..") / "results" / "opt_const" / "results.csv")
project_information = pd.read_csv(Path("../..") / "results" / "opt_const" / "projects.csv")

In [18]:
number_cuts = len(set(results[cut]))
print(f"I tested {number_cuts} unique classes, each being executed {runs} times per configuration")

I tested 106 unique classes, each being executed 10 times per configuration


In [19]:
f_config_names = list(set(results[config]))
config_names = [n for n in f_config_names if type(n) is str]
config_names.sort()
print("I used {} configurations, namely:\n - {}".format(
    len(config_names), "\n - ".join(config_names)
))

I used 10 configurations, namely:
 - Seed_Prob_0_0
 - Seed_Prob_0_1
 - Seed_Prob_0_2
 - Seed_Prob_0_3
 - Seed_Prob_0_4
 - Seed_Prob_0_5
 - Seed_Prob_0_6
 - Seed_Prob_0_7
 - Seed_Prob_0_8
 - Seed_Prob_0_9


### Create a Table for the Configurations and their achieved coverage

In [20]:
table = pylatex.Table(position="H")
tabular = pylatex.Tabular('|c|c|', booktabs=True)
tabular.add_row([
    pylatex.NoEscape(r"P\textsubscript{Constant}"),
    "Avg. Branch Coverage"
])



raw_table_data = results.groupby(config, as_index=False).agg(
    {
        coverage: "mean"
    }
)
raw_table_data.sort_values(by=coverage, ascending=False, inplace=True)
for _, row in raw_table_data.iterrows():
    tabular.add_row([
        row[config][-3] + "." + row[config][-1] ,
        "{:.4f}".format(row[coverage])        
    ])

tabular.add_hline()
table.append(pylatex.NoEscape(r'\centering'))
table.append(tabular)
table.add_caption("The probabilites for seeding " +
                  "constants and the corresponding achieved coverage.")
label = pylatex.Label("tabconstvalues")
table.append(label)

#adjust this path if you want to store the table on your machine
with open ("../../../Thesis/chapters/evaluation_tables/opt_const_table.tex", "w") as file:
    file.write(table.dumps())
print(table.dumps())

\begin{table}[H]%
\centering%
\begin{tabular}{@{}|c|c|@{}}%
\toprule%
P\textsubscript{Constant}&Avg. Branch Coverage\\%
0.4&0.6516\\%
0.5&0.6508\\%
0.9&0.6504\\%
0.6&0.6488\\%
0.1&0.6487\\%
0.3&0.6485\\%
0.2&0.6481\\%
0.7&0.6479\\%
0.0&0.6473\\%
0.8&0.6461\\%
\midrule\bottomrule%
%
\end{tabular}%
\caption{The probabilites for seeding constants and the corresponding achieved coverage.}%
\label{tabconstvalues}%
\end{table}
