# Modified Ochiai 4.2
Fake failing tests set: Tests that cover more lines of each ST method. This process is done for each stack trace entry in the top 10, once at a time. Having the Ochiai scores for each of them, a final ranking is calculated via a weighted average of the scores, prioritizing them from top to bottom

In [1]:
import os
from my_secrets import base_path


paths_dict=  {
    "gzoltar_files_path": os.path.join(base_path, "DeepDiveBugReportsWithLogs", "data", "gzoltar_files"),
    "output_file": os.path.join(base_path, "DeepDiveBugReportsWithLogs", "data", "ochiaiScores", "modifiedOchiai4.2"),
    "data_file_path": os.path.join(base_path, "DeepDiveBugReportsWithLogs", "data", "bug_reports_with_stack_traces_details.json"),
    "failing_tests_info_file_name": "modifiedOchiai4.2_fake_failing_tests_info.json",
    "tests_analysis_results":  os.path.join(base_path,"DeepDiveBugReportsWithLogs",  "data", "rq1_results.json"),
    "fake_test_results_file_name": "fake_test_results_modifiedOchiai4.2.csv",
    "tests_covering_stack_traces_folder":  os.path.join(base_path, "DeepDiveBugReportsWithLogs", "data", "tests_covering_stack_traces_details_per_bug")
}

## Reading the coverage data

In [2]:
%run ../utils.py

bugs_data = json_file_to_dict(paths_dict["data_file_path"])
tests_analysis_results  = json_file_to_dict(paths_dict["tests_analysis_results"])
bugs = get_list_of_bugs_with_coverage(tests_analysis_results)
coverage_data = {}

for bug in bugs:
    project, bug_id = bug.split("_")
    project_gzoltar_folder = os.path.join(paths_dict["gzoltar_files_path"], project)
    if not os.path.exists(project_gzoltar_folder):
        print("Gzoltar folder not fount for the project "+ project)
        print("Skipping!!!!! ")
        continue
    if not project in coverage_data.keys():
        coverage_data[project] = {}

    bug_gzoltar_folder = os.path.join(project_gzoltar_folder, bug_id)
    if not os.path.exists(bug_gzoltar_folder):
        print("Gzoltar folder not fount for the bugId "+ project+ "-" + bug_id)
        print("Skipping!!!!! ")
        continue
    coverage = {}
    try:
        coverage["methods_covered_per_test"] = read_methods_matrix_file(bug_gzoltar_folder)
        coverage["methods_obj_list"] = read_methods_spectra_file(bug_gzoltar_folder)
        test_names, test_results  = read_tests_csv_to_lists(bug_gzoltar_folder)
        coverage["test_names"] = test_names
        print("Number of tests in bug " + project + "-" + bug_id + " - " + str(len(test_names)))
        coverage["test_results"] = [True] * len(test_names) # Erasing the real test results
        coverage_data[project][bug_id] = coverage
    except FileNotFoundError:
        print ("The bug " + project + "-" + bug_id + " does not contain one the the required files. Skipping it")
        continue

print("Done")

Number of tests in bug Cli-14 - 539
Number of tests in bug Cli-5 - 103
Number of tests in bug Closure-106 - 2554
Number of tests in bug Closure-125 - 8156
Number of tests in bug Closure-143 - 4616
Number of tests in bug Closure-152 - 6186
Number of tests in bug Closure-2 - 7830
Number of tests in bug Closure-34 - 7413
Number of tests in bug Closure-37 - 7373
Number of tests in bug Closure-55 - 7155
Number of tests in bug Codec-8 - 294
Number of tests in bug Collections-28 - 5280
Number of tests in bug Compress-1 - 69
Number of tests in bug Compress-12 - 315
Number of tests in bug Compress-14 - 317
Number of tests in bug Compress-17 - 350
Number of tests in bug Compress-18 - 351
Number of tests in bug Compress-23 - 466
Number of tests in bug Compress-24 - 935
Number of tests in bug Compress-27 - 1012
Number of tests in bug Compress-31 - 1106
Number of tests in bug Compress-32 - 613
Number of tests in bug Compress-34 - 1228
Number of tests in bug Csv-12 - 206
Number of tests in bug Csv-4

## Defining the fake failing tests and Running Ochiai (iterated 5 times)

In [3]:
%run ../utils.py
import math

fake_failing_tests_info = {}
for project in coverage_data.keys():
    for bug_id in coverage_data[project].keys():

        bug_data = bugs_data[project][bug_id]
        coverage = coverage_data[project][bug_id]

        print(project + " ---- " + bug_id)

        buggy_commit = bug_data["buggy_commit"]
        fake_failed_tests = []
        methods_list = []

        if bug_data["stackTraceMethodsDetails"] =={ }:
            print("The bug does not contain stackTraceMethodsDetails. Skipping.")
            print()
            continue


        tests_covering_stack_traces_file_path = os.path.join(paths_dict["tests_covering_stack_traces_folder"], project, bug_id + ".json")
        tests_covering_stack_traces_details = json_file_to_dict(tests_covering_stack_traces_file_path)

        fake_failing_tests_average_number = 0
        fake_passing_tests_average_number = 0
        entries_count = 0

        for st_entry_number in range(0,10):
            if st_entry_number >= len(bug_data["stack_trace_methods"]):
                break # ST has less than 10 entries

            print(f"Executing the logic for ST entry {st_entry_number}")

            print("* Part 1 - defining the fake failing tests")

            stack_trace_method = bug_data["stack_trace_methods"][st_entry_number]
            stack_trace_file = bug_data["stack_trace_files"][st_entry_number]
            stack_traces_method_test_count = {}
            num_lines_covered_per_test = {}

            st_method_name = stack_trace_method.split(".")[-1]
            st_file_complete_name = find_file_complete_name(stack_trace_file, bug_data)
            if st_file_complete_name:
                if st_method_name in bug_data["stackTraceMethodsDetails"][st_file_complete_name].keys() and "tests_covering_the_method" in tests_covering_stack_traces_details[st_file_complete_name][st_method_name].keys():
                    for test in tests_covering_stack_traces_details[st_file_complete_name][st_method_name]["tests_covering_the_method"]:
                        num_lines = len(tests_covering_stack_traces_details[st_file_complete_name][st_method_name]["tests_covering_the_method"][test])
                        if test in stack_traces_method_test_count.keys():
                            stack_traces_method_test_count[test] += 1
                            num_lines_covered_per_test[test] += len(tests_covering_stack_traces_details[st_file_complete_name][st_method_name]["tests_covering_the_method"][test])
                        else:
                            stack_traces_method_test_count[test] = 1
                            num_lines_covered_per_test[test] = len(tests_covering_stack_traces_details[st_file_complete_name][st_method_name]["tests_covering_the_method"][test])

            if stack_traces_method_test_count == {}:
                print("No tests found covering this entry of the stack trace. Skipping it")
                continue

            threshold_lines = max(num_lines_covered_per_test.values())
            print("Threshold_lines: " + str(threshold_lines))
            tests_that_cover_more_lines = []
            stack_traces_method_test_count_above_threshold_lines = {}
            for test in num_lines_covered_per_test.keys():
                if num_lines_covered_per_test[test] >= threshold_lines:
                    tests_that_cover_more_lines.append(test)
                    stack_traces_method_test_count_above_threshold_lines[test] = stack_traces_method_test_count[test]

            threshold_st_method= max(stack_traces_method_test_count_above_threshold_lines.values())
            print("Threshold_tests: " + str(threshold_st_method))
            selected_tests = []
            for test in stack_traces_method_test_count_above_threshold_lines.keys():
                if stack_traces_method_test_count_above_threshold_lines[test] >= threshold_st_method:
                    selected_tests.append(test)

            print("* Part 2 - preparing the fake_tests_status")
            coverage["fake_test_results"] = coverage["test_results"]
            number_of_failing_tests = 0
            for index, test in enumerate(coverage["test_names"]):
                if test in selected_tests:
                    number_of_failing_tests +=1
                    coverage["fake_test_results"][index] = False
                else:
                    coverage["fake_test_results"][index] = True

            if number_of_failing_tests == 0:
                print(f"The bug {project}_{bug_id} does not contain fake failing tests for the st_entry {st_entry_number}. Skipping it")
                continue

            print("Storing the fake test results from this implementation in a file for future uses")
            file_name = paths_dict["fake_test_results_file_name"].replace(".csv", f"st_entry_{st_entry_number}.csv")
            store_fake_test_results(coverage, project, bug_id, paths_dict["gzoltar_files_path"], file_name)

            print("* Part 4 - Executing Ochiai")
            methods_ochiai_scores = {}
            for index_m, method_name in enumerate(coverage["methods_obj_list"]):
                n00 = 0
                n01 = 0
                n10 = 0
                n11 = 0
                s_o = 0
                for index_t, test_name in enumerate(coverage["test_names"]):
                    if str(coverage["methods_covered_per_test"][index_t][index_m]) == "1":
                        if not coverage["fake_test_results"][index_t]:
                            n11 += 1
                        else:
                            n10 += 1
                    else:
                        if not coverage["fake_test_results"][index_t]:
                            n01 += 1
                        else:
                            n00 += 1
                try:
                    s_o = n11/math.sqrt((n11+n01)*(n11+n10))
                except ZeroDivisionError:
                    s_o = 0
                methods_ochiai_scores[method_name] = s_o

            if project not in fake_failing_tests_info.keys():
                fake_failing_tests_info[project] = {}
            if bug_id not in fake_failing_tests_info[project].keys():
                fake_failing_tests_info[project][bug_id] = {}
            fake_failing_tests_info[project][bug_id][f"entry_{st_entry_number}"] = {}
            fake_failing_tests_info[project][bug_id][f"entry_{st_entry_number}"]["fake_passing_tests_number"] = len(coverage["fake_test_results"]) - number_of_failing_tests
            fake_failing_tests_info[project][bug_id][f"entry_{st_entry_number}"]["fake_failing_tests_number"] = number_of_failing_tests
            fake_failing_tests_average_number += fake_failing_tests_info[project][bug_id][f"entry_{st_entry_number}"]["fake_failing_tests_number"]
            fake_passing_tests_average_number += fake_failing_tests_info[project][bug_id][f"entry_{st_entry_number}"]["fake_passing_tests_number"]
            entries_count +=1


            print("Number of fake passing tests: " + str(fake_failing_tests_info[project][bug_id][f"entry_{st_entry_number}"]["fake_passing_tests_number"]))
            print("Number of fake failing tests: " + str(fake_failing_tests_info[project][bug_id][f"entry_{st_entry_number}"]["fake_failing_tests_number"]) + "\n")
            ochiai_score_file = os.path.join(paths_dict["output_file"], project, bug_id, f"entry_{st_entry_number}" + ".json")
            dict_to_json_file(ochiai_score_file, methods_ochiai_scores)

        if entries_count !=0:
            fake_failing_tests_average_number = fake_failing_tests_average_number/entries_count
            fake_passing_tests_average_number = fake_passing_tests_average_number/entries_count
            fake_failing_tests_info[project][bug_id]["fake_passing_tests_number"] = fake_passing_tests_average_number
            fake_failing_tests_info[project][bug_id]["fake_failing_tests_number"] = fake_failing_tests_average_number


dict_to_json_file(os.path.join(paths_dict["output_file"], paths_dict["failing_tests_info_file_name"]), fake_failing_tests_info)
print("Execution completed")

Cli ---- 14
Executing the logic for ST entry 0
* Part 1 - defining the fake failing tests
Threshold_lines: 11
Threshold_tests: 1
* Part 2 - preparing the fake_tests_status
Storing the fake test results from this implementation in a file for future uses
* Part 4 - Executing Ochiai
Number of fake passing tests: 532
Number of fake failing tests: 7

Executing the logic for ST entry 1
* Part 1 - defining the fake failing tests
Threshold_lines: 8
Threshold_tests: 1
* Part 2 - preparing the fake_tests_status
Storing the fake test results from this implementation in a file for future uses
* Part 4 - Executing Ochiai
Number of fake passing tests: 538
Number of fake failing tests: 1

Executing the logic for ST entry 2
* Part 1 - defining the fake failing tests
Threshold_lines: 6
Threshold_tests: 1
* Part 2 - preparing the fake_tests_status
Storing the fake test results from this implementation in a file for future uses
* Part 4 - Executing Ochiai
Number of fake passing tests: 537
Number of fake 

## Getting the final Ochiai scores file with the weighted average

In [4]:
%run ../utils.py

weights_list = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]

for project in coverage_data.keys():
    for bug_id in coverage_data[project].keys():
        ochiai_scores_folder = os.path.join(paths_dict["output_file"], project, bug_id)
        entries_file_scores = get_json_files(ochiai_scores_folder)
        ochiai_scores_data = {}
        sum_weights = 0
        for entry_number, entry_file in enumerate(entries_file_scores):
            entry_data = json_file_to_dict(entry_file)
            weight = weights_list[entry_number]
            sum_weights += weight
            for key in entry_data.keys():
                if key not in ochiai_scores_data.keys():
                    ochiai_scores_data[key] = weight * entry_data[key]
                else:
                    ochiai_scores_data[key] += weight * entry_data[key]
        for key in ochiai_scores_data.keys():
            ochiai_scores_data[key] = ochiai_scores_data[key]/sum_weights

        dict_to_json_file(os.path.join(paths_dict["output_file"], project, bug_id + ".json"), ochiai_scores_data)

print("Execution completed")

Execution completed
