# Calculate the coverage percentages

### Vars declaration

In [1]:
import os
from my_secrets import base_path

paths_dict=  {
    "data_file_path": os.path.join(base_path, "DeepDiveBugReportsWithLogs", "data", "bug_reports_with_stack_traces_details.json"),
    "gzoltar_data_file_path": os.path.join(base_path, "DeepDiveBugReportsWithLogs", "data", "coverage_data_gzoltar.json"),
    "projects_dir": os.path.join(base_path, "open_source_repos_being_studied"),
    "output_file_path": os.path.join(base_path, "DeepDiveBugReportsWithLogs", "data", "coverage_percentages.csv"),
    "tests_analysis_results":  os.path.join(base_path,"DeepDiveBugReportsWithLogs",  "data", "rq1_results.json")
}

projects_github = {
    "Cli": "apache/commons-cli",
    "Closure": "google/closure-compiler",
    "Codec": "apache/commons-codec",
    "Collections": "apache/commons-collections",
    "Compress": "apache/commons-compress",
    "Csv": "apache/commons-csv",
    "Gson": "google/gson",
    "JacksonCore": "FasterXML/jackson-core",
    "JacksonDatabind": "FasterXML/jackson-databind",
    "Jsoup": "jhy/jsoup",
    "JxPath": "apache/commons-jxpath",
    "Lang": "apache/commons-lang",
    "Math": "apache/commons-math",
    "Mockito": "mockito/mockito",
    "Time" : "JodaOrg/joda-time",
    "fastjson": "alibaba/fastjson",
    "junit4": "junit-team/junit4"
}

## General methods

In [3]:
import os
import utils
import importlib
importlib.reload(utils)

tests_analysis_results  = utils.json_file_to_dict(paths_dict["tests_analysis_results"])
bugs = utils.get_list_of_bugs_with_coverage(tests_analysis_results)

bugs_data = utils.json_file_to_dict(paths_dict["data_file_path"])
gzoltar_data = utils.json_file_to_dict(paths_dict["gzoltar_data_file_path"])
coverage_obj = {}

problematic_bugs = ["Mockito_17", "Mockito_22", "Mockito_25", "Mockito_30", "Mockito_31", "Mockito_35"]

for bug in bugs:
    if bug in problematic_bugs:
        continue
    project, bug_id = bug.split("_")
    repo_name = projects_github[project].split("/")[1]

    print(bug)

    bug_report_commit_hash = utils.get_bug_report_commit(bugs_data, project, bug_id)
    project_path = os.path.join(paths_dict["projects_dir"], repo_name)
    os.chdir(project_path)
    os.system("git reset --hard --quiet && git clean --quiet -f -d")
    checkout_command = "git checkout  --quiet " + bug_report_commit_hash
    os.system(checkout_command)

    # Calculating buggy_files average coverage
    buggy_code_info = bugs_data[project][bug_id]["buggyMethods"]
    buggy_files = buggy_code_info.keys()
    sum_buggy_files_coverage = 0
    sum_buggy_methods_coverage = 0
    count_buggy_methods = 0
    for buggy_file in buggy_files:
        b_file = buggy_file
        first_method = list(buggy_code_info[buggy_file].keys())[0]
        if "previousFileName" in  buggy_code_info[buggy_file][first_method].keys():
            b_file = buggy_code_info[buggy_file][first_method]["previousFileName"].replace(project_path + "/", "")
        if not utils.find_file(b_file, project_path):
            continue
        buggy_file_covered_lines = []
        for gzoltar_file in gzoltar_data[project][bug_id].keys():
            if gzoltar_file in b_file:
                for method in gzoltar_data[project][bug_id][gzoltar_file].keys():
                    buggy_file_covered_lines += gzoltar_data[project][bug_id][gzoltar_file] [method]
                break

        buggy_file_lines_of_code = utils.count_lines_of_code_for_coverage(b_file, project_path, buggy_file_covered_lines)
        buggy_file_coverage_percentage = 100 * len(buggy_file_covered_lines)/buggy_file_lines_of_code
        sum_buggy_files_coverage += buggy_file_coverage_percentage

        for buggy_method in buggy_code_info[buggy_file].keys():
            try:
                startLine = int(buggy_code_info[buggy_file][buggy_method]["bugReportCommitStartLine"])
                endLine = int(buggy_code_info[buggy_file][buggy_method]["bugReportCommitEndLine"])
            except KeyError:
                continue

            method_covered_lines = utils.get_method_covered_lines_list(buggy_file_covered_lines, startLine, endLine)
            buggy_method_lines_of_code = utils.count_lines_of_code_for_coverage(b_file, project_path, buggy_file_covered_lines, startLine, endLine)
            buggy_method_coverage_percentage = 100 * len(method_covered_lines)/buggy_method_lines_of_code
            sum_buggy_methods_coverage += buggy_method_coverage_percentage
            count_buggy_methods += 1
    if not buggy_files:
        average_buggy_files_coverage = "N/A"
        average_buggy_methods_coverage = "N/A"
    else:
        average_buggy_files_coverage = sum_buggy_files_coverage/len(buggy_files)
        average_buggy_methods_coverage = sum_buggy_methods_coverage/count_buggy_methods

    # Calculating stack trace files average coverage
    sum_st_files_coverage = 0
    internal_st_files_count = 0
    sum_st_methods_coverage = 0
    count_st_methods = 0

    st_methods_detailed_info = bugs_data[project][bug_id]["stackTraceMethodsDetails"]
    for st_file_path in st_methods_detailed_info.keys():
        st_file = st_file_path
        first_method = list(st_methods_detailed_info[st_file_path].keys())[0]
        if "previousFileName" in  st_methods_detailed_info[st_file_path][first_method].keys():
            st_file = st_methods_detailed_info[st_file_path][first_method]["previousFileName"]
        if not os.path.exists(st_file):
            continue
        st_files_covered_lines = []
        for gzoltar_file in gzoltar_data[project][bug_id].keys():
            if gzoltar_file in st_file_path:
                for method in gzoltar_data[project][bug_id][gzoltar_file].keys():
                    st_files_covered_lines += gzoltar_data[project][bug_id][gzoltar_file] [method]
                break
        st_file_lines_of_code = utils.count_lines_of_code_for_coverage(st_file, "", st_files_covered_lines)
        st_file_coverage_percentage = 100 * len(st_files_covered_lines)/st_file_lines_of_code
        sum_st_files_coverage += st_file_coverage_percentage
        internal_st_files_count +=1

        for method in st_methods_detailed_info[st_file_path].keys():
            startLine = int(st_methods_detailed_info[st_file_path][method]["bugReportCommitStartLine"])
            endLine = int(st_methods_detailed_info[st_file_path][method]["bugReportCommitEndLine"])
            st_method_covered_lines = utils.get_method_covered_lines_list(st_files_covered_lines, startLine, endLine)
            st_method_lines_of_code = utils.count_lines_of_code_for_coverage(st_file, "", st_method_covered_lines, startLine, endLine)
            st_method_coverage_percentage = 100 * len(st_method_covered_lines)/st_method_lines_of_code
            sum_st_methods_coverage += st_method_coverage_percentage
            count_st_methods += 1

    if internal_st_files_count == 0: # Only external files in the stack trace
        average_st_files_coverage = "N/A"
        average_st_methods_coverage = "N/A"
    else:
        average_st_files_coverage = sum_st_files_coverage/internal_st_files_count
        average_st_methods_coverage = sum_st_methods_coverage/count_st_methods


    # Calculating all files average coverage - Obs: only considering files with coverage into the sum
    sum_all_files_coverage = 0
    all_files_count = 0
    gzoltar_file_problems = False
    file_not_found_count = 0
    for gzoltar_file in gzoltar_data[project][bug_id].keys():
        file_name = gzoltar_file.split("/")[-1]
        gzoltar_file_path = utils.find_file(gzoltar_file, project_path)
        if gzoltar_file_path is None: # Gzoltar file not found.
            file_not_found_count +=1
            gzoltar_file_problems = True
            continue
        file_covered_lines =[]
        for method in gzoltar_data[project][bug_id][gzoltar_file].keys():
            file_covered_lines += gzoltar_data[project][bug_id][gzoltar_file] [method]
        file_lines_of_code = utils.count_lines_of_code_for_coverage(gzoltar_file_path, "", file_covered_lines)
        file_coverage_percentage = 100 * len(file_covered_lines)/file_lines_of_code
        sum_all_files_coverage += file_coverage_percentage
        all_files_count += 1

    try:
        average_all_files_coverage = sum_all_files_coverage/all_files_count
    except ZeroDivisionError:
        average_all_files_coverage = 0

    stack_trace_files = bugs_data[project][bug_id]["stack_trace_files"]
    stack_trace_methods = bugs_data[project][bug_id]["stack_trace_methods"]
    pos_first_buggy_method_in_stack_trace = None
    for index, st_file in enumerate(stack_trace_files):
        for buggy_file in buggy_code_info.keys():
            if st_file in buggy_file:
                for buggy_method in buggy_code_info[buggy_file].keys():
                    st_method = stack_trace_methods[index].split(".")[-1].split("$")[0]
                    if st_method == buggy_method:
                        pos_first_buggy_method_in_stack_trace = index + 1
                        break
            if pos_first_buggy_method_in_stack_trace is not None:
                break
        if pos_first_buggy_method_in_stack_trace is not None:
            break


    if gzoltar_file_problems:
        print(f"{bug} - WARNING: {file_not_found_count} Gzoltar files were not found in the buggy commit")

    if not project in coverage_obj.keys():
        coverage_obj[project] = {}
    coverage_obj[project][bug_id] = {
        "average_coverage_buggy_files": average_buggy_files_coverage,
        "average_coverage_stack_trace_files": average_st_files_coverage,
        "average_all_files_coverage": average_all_files_coverage,
        "average_buggy_methods_coverage": average_buggy_methods_coverage,
        "average_st_methods_coverage": average_st_methods_coverage,
        "pos_first_buggy_method_in_stack_trace": pos_first_buggy_method_in_stack_trace
    }
utils.create_coverage_percent_file(coverage_obj, paths_dict["output_file_path"])
print("Done")

Cli_14
Cli_5
Closure_106
Closure_125
Closure_143
Closure_152
Closure_2
Closure_34
Closure_37
Closure_55
Codec_8
Collections_28
Compress_1
Compress_12
Compress_14
Compress_17
Compress_18
Compress_23
Compress_24
Compress_27
Compress_31
Compress_32
Compress_34
Csv_12
Csv_4
Gson_12
Gson_5
Gson_8
JacksonCore_11
JacksonCore_4
JacksonCore_7
JacksonDatabind_14
JacksonDatabind_15
JacksonDatabind_25
JacksonDatabind_28
JacksonDatabind_32
JacksonDatabind_37
JacksonDatabind_59
JacksonDatabind_73
JacksonDatabind_9
Jsoup_27
Jsoup_34
Jsoup_54
Jsoup_78
Jsoup_80
Jsoup_82
Jsoup_84
Jsoup_90
JxPath_5
Lang_19
Lang_37
Lang_6
Lang_61
Math_79
Math_81
Math_97
Mockito_4
Mockito_5
Time_10
Time_14
Cli_7
JacksonCore_10
JacksonDatabind_3
Done
