# Mutating the Gaps

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# import mutants
df_mutants = pd.read_csv("../project_data/all_projects.csv")

In [None]:
# overall
df_overall = (
    df_mutants[
        ["class", "project", "pit_total", "pit_killed", "pit_survived", "pit_uncovered"]
    ]
    .groupby("class")
    .agg("sum")
)
df_overall["pit_ms"] = df_overall["pit_killed"] / df_overall["pit_total"]
df_overall.reset_index(inplace=True)

In [None]:
# gap_ms
for project in df_mutants["class"].unique():

    # slicer4j_gap
    df_s4j_covgap = df_mutants[
        (df_mutants["covgap_on_slicer4j_slice"] == True)
        & (df_mutants["class"] == project)
    ]
    if not df_s4j_covgap.empty:
        ms_s4j_covgap = (
            df_s4j_covgap["pit_killed"].sum() / df_s4j_covgap["pit_total"].sum()
        )
        df_overall.loc[(df_overall["class"] == project), "Slicer4J"] = ms_s4j_covgap

    # porbs_gap
    df_porbs_covgap = df_mutants[
        (df_mutants["covgap_on_porbs_slice"] == True) & (df_mutants["class"] == project)
    ]

    if not df_porbs_covgap.empty:
        ms_porbs_covgap = (
            df_porbs_covgap["pit_killed"].sum() / df_porbs_covgap["pit_total"].sum()
        )
        df_overall.loc[(df_overall["class"] == project), "PORBS"] = ms_porbs_covgap

    # pseudosweep_gap
    df_pseudosweep_covgap = df_mutants[
        (df_mutants["pseudotested_ps"] == True) & (df_mutants["class"] == project)
    ]

    if not df_pseudosweep_covgap.empty:
        ms_pseudosweep_covgap = (
            df_pseudosweep_covgap["pit_killed"].sum()
            / df_pseudosweep_covgap["pit_total"].sum()
        )
        df_overall.loc[(df_overall["class"] == project), "PseudoSweep"] = (
            ms_pseudosweep_covgap
        )

    # break

In [None]:

df_overall.sort_values("class", inplace=True)
df_overall.sort_values("project", inplace=True)

sns.scatterplot(
    x=df_overall["class"], y=df_overall["Slicer4J"], marker="o", label="Slicer4J"
)
sns.scatterplot(x=df_overall["class"], y=df_overall["PORBS"], marker="x", label="PORBS")
sns.scatterplot(
    x=df_overall["class"], y=df_overall["PseudoSweep"], marker="v", label="PseudoSweep"
)

plt.ylabel("Coverage Gap Mutation Score")
plt.xlabel("Class Under Test")

plt.xticks(rotation=45, ha="right")
plt.title("Coverage Gap Mutation Scores for each Class Under Test")
plt.savefig(
    bbox_inches="tight",
)

In [None]:
gaps = df_overall[["Slicer4J", "PORBS", "PseudoSweep"]]

print(gaps.quantile(q=[0.0, 0.25, 0.5, 0.75, 1.0], axis=0, numeric_only=True))

bp = sns.boxplot(gaps, color="tab:blue")

plt.ylabel("Coverage Gap Mutation Score")
plt.xlabel("Tool used to calculate gap")
plt.title("Coverage Gap Mutation Scores for each Tool")

plt.savefig(
    bbox_inches="tight",
)