# RQ1: Prominence

## Are there coverage gaps present between statement coverage and the test-oracle adequacy metrics?

Answering this RQ:
For each project identify if there is a coverage gap > 0. Create a table that shows the proportion of element on a given slice, the coverage, and the checked coverage.


In [None]:
# imports and helper methods
import csv
from pathlib import Path
import numpy as np
import pandas as pd

def read_project_list(path):
    project_list_path = path
    with open(project_list_path, "r") as project_list_csv:
        # read project csv
        csv_reader = csv.DictReader(project_list_csv)
        project_dict = [row for row in csv_reader]

        # import each active project
        project_list = []

        for project in project_dict:
            if project["active"] == "true".strip():
                project_list.append(project["name"])

        return project_list

In [None]:
df = pd.read_csv('../project_data/all_projects.csv')

In [None]:
df_project_count = (
    df[
        [
            "project",
            "class",
            "statement_ps",
            "covered_ps",
            "on_slicer4j_slice",
            "covered_slicer4J",
            "on_porbs_slice",
            "covered_porbs",
            "required_ps",
            "pseudotested_ps",
            "covgap_on_porbs_slice",
            "covgap_on_slicer4j_slice",
        ]
    ]
    .groupby("class")
    .agg("sum")
)


In [None]:
df_table = df_project_count

total = df_table.sum()

# df_table["pct_is_covered_clover"] = df_table["is_clover_covered"] / df_table["NLOC"] * 100
df_table["pct_is_covered_ps"] = df_table["covered_ps"] / df_table["statement_ps"] * 100


df_table = df_table[
    [
        "project",
        "statement_ps",
        "covered_ps",
        "pct_is_covered_ps",
        "on_slicer4j_slice",
        "covered_slicer4J",
        "covgap_on_slicer4j_slice",
        "on_porbs_slice",
        "covered_porbs",
        "covgap_on_porbs_slice",
        "required_ps",
        "pseudotested_ps",
    ]
]
df_table.sort_values(by="project", inplace=True)
df_table.drop(columns=["project"], inplace=True)
df_table.loc["Total"] = total

print(df_table)

In [None]:
df_table.to_latex(
    # buf="/Users/meganmaton/Repos/Papers/cc-orbs-ps-paper/table-data/coverage-gaps.tex",
    # columns=[
    #     "project_name",
    #     "NLOC",
    #     "is_covered_clover",
    #     "pct_is_covered_clover",
    #     "bar_is_covered_clover"
    #     "on_slicer4j_ds_slice",
    #     "covgap_on_slicer4j_ds_slice",
    #     "bar_on_slicer4j_ds_slice"
    #     "on_porbs_slice",
    #     "covgap_on_porbs_slice",
    #     "bar_on_porbs_slice"
    #     "is_required_ps",
    #     "is_pseudotested_ps",
    #     "bar_is_required_ps"
    # ],
    header=False,
    index=True,
    na_rep="-",
    escape=True,
    float_format="{:.0f}".format,
)

In [None]:
mismatched_classes = df_project_count[
    (df_project_count["required_ps"] + df_project_count["pseudotested_ps"]) != df_project_count["covered_ps"]
]

if not mismatched_classes.empty:
    print("The following classes have a mismatch where 'required_ps' + 'pseudotested_ps' is not equal to 'covered_ps':")
    print(mismatched_classes)
else:
    print("For all classes, 'required_ps' + 'pseudotested_ps' equals 'covered_ps'.")


In [None]:
mismatched_lines = df[
    (
        ((df["required_ps"] == True) & (df["pseudotested_ps"] == True))
        | ((df["required_ps"] == False)
        & ((df["pseudotested_ps"] == False) & (df["covered_ps"] == True)))
    )
]

if not mismatched_lines.empty:
    print(
        "The following classes have a mismatch where 'required_ps' + 'pseudotested_ps' is not equal to 'covered_ps':"
    )
    print(mismatched_lines)
else:
    print("For all lines, 'required_ps' + 'pseudotested_ps' equals 'covered_ps'.")


print(mismatched_lines)