## Var declarations

In [1]:
gzoltar_files_path = "/Users/lorenapacheco/Concordia/Masters/BugReportsMining/gzoltar_files"
data_file_path = "/Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/merged_data_production_bug_reports.json"
gzoltar_file_path = "/Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_gzoltar.json"
gzoltar_per_test_path = "/Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/"

## General methods

In [2]:
import json
import os

def json_file_to_dict(file):
    data = {}
    with open(os.path.join(file), 'r') as fp:
        data = json.load(fp)
    fp.close()
    return data

def dict_to_json_file(file, dic):
    folder = os.path.dirname(file)
    if not os.path.exists(folder):
        os.makedirs(folder)
    with open(os.path.join(file), 'w') as fp:
        json.dump(dic, fp, sort_keys=True, indent=4)
    fp.close()

## Reading the data

In [3]:
import csv
import re

def read_matrix_file(file_path):
    statements_covered_per_test = []
    test_passed = []

    with open(file_path + "/matrix.txt", 'r') as f:
        for line in f:
            row = [int(num) for num in line.strip()[:-1].split()]
            sign = line.strip()[-1]
            statements_covered_per_test.append(row)
    return statements_covered_per_test

def read_spectra_file(file_path):
    lines_of_code_obj_list = []
    pattern = r'^(.*?)#(.*?)\((.*?)\):(\d+)$'
    with open(file_path + "/spectra.csv", 'r') as file:
        first_line = True
        for line in file:
            # Skip the first line
            if first_line:
                first_line = False
                continue
            composed_str = line
            match = re.search(pattern, composed_str)
            if match is None:
                print("match not found")
                print(composed_str)
                continue
            class_name = match.group(1)
            method_name = match.group(2)
            method_parameters = match.group(3)
            line_number = int(match.group(4))
            lines_of_code_obj_list.append({
                "class_name": class_name,
                "method_name": method_name,
                "method_parameters": method_parameters,
                "line_number": line_number,
            })
    return lines_of_code_obj_list

def read_tests_file(file_path):
    test_names = []
    test_results = []
    first_row = True
    with open(file_path + "/tests.csv", 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            if first_row:
                first_row = False
                continue
            test_name = row[0]
            test_result = False
            if row[1] == "PASS":
                test_result = True
            test_names.append(test_name)
            test_results.append(test_result)
    return test_names, test_results


bugs_data = json_file_to_dict(data_file_path)
coverage_data = {}
for project in bugs_data.keys():
    project_gzoltar_folder = gzoltar_files_path + "/" + project
    if not os.path.exists(project_gzoltar_folder):
        print("Gzoltar folder not fount for the project "+ project)
        print("Skipping!!!!! ")
        continue
    if not project in coverage_data.keys():
        coverage_data[project] = {}
    for bug_id in bugs_data[project].keys():
        bug_gzoltar_folder = project_gzoltar_folder + "/" + bug_id
        if not os.path.exists(bug_gzoltar_folder):
            print("Gzoltar folder not fount for the bugId "+ project+ "-" + bug_id)
            print("Skipping!!!!! ")
            continue
        coverage_data[project][bug_id] = {}
        coverage_data[project][bug_id]["statements_covered_per_test"] = read_matrix_file(bug_gzoltar_folder)
        print("Number of tests in bug "+ project+ "-" + bug_id + " - " + str(len(coverage_data[project][bug_id]["statements_covered_per_test"])))
        coverage_data[project][bug_id]["lines_of_code_obj_list"] = read_spectra_file(bug_gzoltar_folder)
        test_names, test_results = read_tests_file(bug_gzoltar_folder)
        coverage_data[project][bug_id]["test_names"] = test_names
        coverage_data[project][bug_id]["test_results"] = test_results
print("Done")

Number of tests in bug Cli-14 - 544
Number of tests in bug Cli-5 - 102
Number of tests in bug Closure-106 - 2595
Number of tests in bug Closure-125 - 8158
Number of tests in bug Closure-143 - 0
Number of tests in bug Closure-152 - 0
Number of tests in bug Closure-2 - 7828
Number of tests in bug Closure-27 - 7482
Number of tests in bug Closure-34 - 7414
Number of tests in bug Closure-37 - 7373
Number of tests in bug Closure-55 - 7168
Number of tests in bug Codec-8 - 304
Number of tests in bug Collections-28 - 5284
Number of tests in bug Compress-1 - 70
Number of tests in bug Compress-12 - 316
Number of tests in bug Compress-14 - 320
Number of tests in bug Compress-17 - 351
Number of tests in bug Compress-18 - 357
Number of tests in bug Compress-23 - 467
Number of tests in bug Compress-32 - 614
Number of tests in bug Csv-12 - 198
Number of tests in bug Csv-4 - 178
Number of tests in bug Gson-12 - 1017
Number of tests in bug Gson-8 - 992
Number of tests in bug JacksonCore-4 - 239
Number o

## Removing the failing test results

In [4]:
for project in coverage_data.keys():
    print(project)
    for bug_id in coverage_data[project].keys():
        print(bug_id)
        failing_tests_indexes = []
        print(len(coverage_data[project][bug_id]["test_names"]))
        for index, test in enumerate(coverage_data[project][bug_id]["test_names"]):
            if not coverage_data[project][bug_id]["test_results"][index]: # failing test
                failing_tests_indexes.append(index)
        print( "----")
        print (failing_tests_indexes)
        print(str(len(failing_tests_indexes)) + " tests to be deleted")
        for index in reversed(failing_tests_indexes):
            del coverage_data[project][bug_id]["test_names"][index]
            del coverage_data[project][bug_id]["test_results"][index]
            del coverage_data[project][bug_id]["statements_covered_per_test"][index]
        print (len(coverage_data[project][bug_id]["test_names"]))

print("Done")

Cli
14
544
----
[196]
1 tests to be deleted
543
5
102
----
[0, 17, 57]
3 tests to be deleted
99
Closure
106
2595
----
[547, 548, 646, 1400]
4 tests to be deleted
2591
125
8158
----
[674]
1 tests to be deleted
8157
143
0
----
[]
0 tests to be deleted
0
152
0
----
[]
0 tests to be deleted
0
2
7828
----
[1239]
1 tests to be deleted
7827
27
7482
----
[7243, 7244, 7245]
3 tests to be deleted
7479
34
7414
----
[4087]
1 tests to be deleted
7413
37
7373
----
[6227]
1 tests to be deleted
7372
55
7168
----
[4625]
1 tests to be deleted
7167
Codec
8
304
----
[292]
1 tests to be deleted
303
Collections
28
5284
----
[1018]
1 tests to be deleted
5283
Compress
1
70
----
[16]
1 tests to be deleted
69
12
316
----
[46]
1 tests to be deleted
315
14
320
----
[251]
1 tests to be deleted
319
17
351
----
[262]
1 tests to be deleted
350
18
357
----
[255]
1 tests to be deleted
356
23
467
----
[92, 190]
2 tests to be deleted
465
32
614
----
[273, 453]
2 tests to be deleted
612
Csv
12
198
----
[137]
1 tests to be

# Getting the general coverage information - covered lines per file and method - stored in data/coverage_data_gzoltar.json
This information is later used to calculate the coverage percentage of the files and methods being analysed

In [5]:
covered_lines_gzoltar = {}
for project in coverage_data.keys():
    print(project)
    for bug_id in coverage_data[project].keys():
        coverage_info = {}
        for index_t, test_coverage in enumerate(coverage_data[project][bug_id]["statements_covered_per_test"]):
            for index_s, statement_instance in enumerate(coverage_data[project][bug_id]["statements_covered_per_test"][index_t]):
                if str(statement_instance) == "1": # 1= covered, 0=not covered
                    lines_of_code_obj_list = coverage_data[project][bug_id]["lines_of_code_obj_list"][index_s]
                    file_name = lines_of_code_obj_list["class_name"].replace(".", "/").replace("$", "/") + ".java"
                    method_name = lines_of_code_obj_list["method_name"] + " - " + lines_of_code_obj_list["method_parameters"]
                    line_number = lines_of_code_obj_list["line_number"]
                    if file_name not in coverage_info.keys():
                        coverage_info[file_name] = {}
                    if method_name not in coverage_info[file_name].keys():
                        coverage_info[file_name][method_name] = []
                    if line_number not in coverage_info[file_name][method_name]:
                        coverage_info[file_name][method_name].append(line_number)
        if project not in covered_lines_gzoltar.keys():
            covered_lines_gzoltar[project] = {}
        covered_lines_gzoltar[project][bug_id] = coverage_info
dict_to_json_file(gzoltar_file_path,covered_lines_gzoltar)
print("Collected info added to the file "+ gzoltar_file_path)

Cli
Closure
Codec
Collections
Compress
Csv
Gson
JacksonCore
JacksonDatabind
Jsoup
JxPath
Mockito
Time
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_gzoltar.json


# Getting the coverage information per test - covered lines per test per file and method - stored in data/coverage_data_per_test_gzoltar.json
This information is later used to select the tests the best cover the bug to apply spectrum-based fault localization techniques

In [None]:
covered_lines_per_test_gzoltar = {}
for project in coverage_data.keys():
    print(project)
    folder_name = gzoltar_per_test_path + project + "/"
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    for bug_id in coverage_data[project].keys():
        coverage_info_per_test = {}
        file_path = folder_name + bug_id + ".json"
        for index_t, test_coverage in enumerate(coverage_data[project][bug_id]["statements_covered_per_test"]):
            test_name = coverage_data[project][bug_id]["test_names"][index_t]
            coverage_info_per_test[test_name] = {}
            for index_s, statement_instance in enumerate(coverage_data[project][bug_id]["statements_covered_per_test"][index_t]):
                if str(statement_instance) == "1": # 1= covered, 0=not covered
                    lines_of_code_obj_list = coverage_data[project][bug_id]["lines_of_code_obj_list"][index_s]
                    file_name = lines_of_code_obj_list["class_name"].replace(".", "/").replace("$", "/") + ".java"
                    method_name = lines_of_code_obj_list["method_name"] + " - " + lines_of_code_obj_list["method_parameters"]
                    line_number = lines_of_code_obj_list["line_number"]
                    if file_name not in  coverage_info_per_test[test_name].keys():
                        coverage_info_per_test[test_name][file_name] = {}
                    if method_name not in  coverage_info_per_test[test_name][file_name].keys():
                        coverage_info_per_test[test_name][file_name][method_name] = []
                    if line_number not in  coverage_info_per_test[test_name][file_name][method_name]:
                        coverage_info_per_test[test_name][file_name][method_name].append(line_number)
        dict_to_json_file(file_path, coverage_info_per_test)
        print("Collected info added to the file "+ file_path)
print("Done")

Cli
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/Cli/14.json
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/Cli/5.json
Closure
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/Closure/106.json
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/Closure/125.json
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/Closure/143.json
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/Closure/152.json
Collected info added to the file /Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_per_test_gzoltar/Closure/2.json
Collect