In [16]:
#base_path = "/Users/lorenapacheco/Concordia/Masters/"
base_path = r"C:\Users\loren\Concordia\Masters\BugReportsMining\/"
#ochiai_scores_paths_list = [base_path + "BugReportsMining/ochiaiScores/", base_path + "BugReportsMining/ochiaiScores2/"]
ochiai_scores_paths_list = [base_path + r"ochiaiScores2.0\Ochiai1/", base_path + r"ochiaiScores2.0\Ochiai2/"]
#out = base_path + "BugReportsMining/Rankings/"
out = base_path + r"Rankings\/"
data_file_path = base_path + r"data\merged_data_production_bug_reports.json"

In [58]:
import glob
import os
import json

def json_file_to_dict(file):
    data = {}
    with open(os.path.join(file), 'r') as fp:
        data = json.load(fp)
    fp.close()
    return data

def dict_to_json_file(file, dic, folder):
    if not os.path.exists(folder):
        os.makedirs(folder)
    with open(os.path.join(folder, file+'.json'), 'w') as fp:
        json.dump(dic, fp, sort_keys=True, indent=4)
    fp.close()

def sort_dict (dictionary):
    return dict(sorted(dictionary.items(), key=lambda item: item[1], reverse = True))

def get_number_of_buggy_methods_in_top_n (ranking_data, n, buggy_methods_list):
    buggy_methods_in_top_n = 0
    for method in buggy_methods_list:
        try:
            if ranking_data[method] <= n:
                buggy_methods_in_top_n +=1
        except:
            continue
    return buggy_methods_in_top_n

def get_first_buggy_method_in_stack_trace(buggy_methods_list, stack_trace_methods):
    best_position = float('inf')
    for buggy_method in buggy_methods_list:
        for index, st_method in enumerate(stack_trace_methods):
            st_method_id = st_method
            last_dot_index = st_method.rfind('.')
            if last_dot_index != -1:
                st_method_id = st_method[:last_dot_index] + '#' + st_method[last_dot_index + 1:]
            if buggy_method.endswith(st_method_id):
                if (index + 1) < best_position:
                    best_position = index + 1
                break
    if best_position == float('inf'):
        best_position = "not found"
    return best_position


def get_best_classified_buggy_method (ranking_data, buggy_methods_list):
    best_position = float('inf')
    for method in buggy_methods_list:
        try:
            if ranking_data[method] < best_position:
                best_position = ranking_data[method]
        except:
            continue
    if best_position == float('inf'):
        best_position = "not found"
    return best_position

def get_precision_top_n (ranking_data, n, buggy_methods_list):
    buggy_methods_in_top_n = get_number_of_buggy_methods_in_top_n(ranking_data, n, buggy_methods_list)
    precision =  buggy_methods_in_top_n / n
    return precision

def extract_buggy_methods_list (ranking_data, buggyMethods):
    buggy_methods_list = []
    temp=[]
    for file in buggyMethods.keys():
        class_id = file.replace(".java", "")
        class_id = class_id.replace("/", ".")
        class_id = class_id.replace("\/", ".")
        for method_name in buggyMethods[file]:
            method_id = class_id + "#" + method_name
            temp.append(method_id)
    for temp_method_id in temp:
        found = False
        for method_name in ranking_data.keys():
            if temp_method_id.endswith(method_name):
                buggy_methods_list.append(method_name)
                found = True
                break
        if not found:
            buggy_methods_list.append(temp_method_id)
    return buggy_methods_list


def get_average_precision_top_n(n, project, buggyMethods):
    sum = 0
    for bug_id in buggyMethods[project].keys():
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        sum += get_precision_top_n (ranking_info, n, project, bug_id)
    return sum/len(buggyMethods[project].keys())


def get_recall_top_n (ranking_data, n, buggy_methods_list):
    buggy_methods_in_top_n = get_number_of_buggy_methods_in_top_n(ranking_data, n, buggy_methods_list)
    recall = buggy_methods_in_top_n / len(buggy_methods_list)
    return recall

def get_average_recall_top_n(n, project):
    sum = 0
    for bug_id in buggyMethods[project].keys():
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        sum += get_recall_top_n (ranking_info, n, project, bug_id)
    return sum/len(buggyMethods[project].keys())

def get_f1_top_n (ranking_data, n,buggyMethods):
    precision = get_precision_top_n(ranking_data, n, buggyMethods)
    recall = get_recall_top_n(ranking_data, n, buggyMethods)
    try:
        f1 = 2 * precision * recall / (precision + recall)
    except ZeroDivisionError:
        return 0.0
    return f1

def get_average_f1_top_n(n, project,buggyMethods):
    sum = 0
    for bug_id in buggyMethods[project].keys():
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        sum += get_f1_top_n (ranking_info, n, project, bug_id)
    return sum/len(buggyMethods[project].keys())

def get_method_rank(ranking_info, method):
    try:
        rank = ranking_info[method]
    except KeyError:
        rank = len(ranking_info) + 1
    return rank

def get_map (project, ochiai_identificator,project_bugs_data):
    sum_for_map = 0
    number_of_bugs = len(project_bugs_data)
    for bug_id in project_bugs_data.keys():
        buggyMethods = project_bugs_data[bug_id]["buggyMethods"]
        buggy_methods_list = extract_buggy_methods_list (ranking, buggyMethods)
        sum_for_ap = 0
        if ochiai_identificator == "stackTraces":
            ranking_info = get_st_raking_dict(project_bugs_data[bug_id]["stack_trace_methods"])
        else:
            ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
            try:
                ranking_info = json_file_to_dict(ranking_file)
            except FileNotFoundError:
                continue
        if len(ranking_info) == 0: # No ranking info
            if ochiai_identificator != "stackTraces": # Due to the ausence of gzoltar files
                number_of_bugs -= 1
            continue
        buggy_methods_sorted_list = sorted(buggy_methods_list, key = lambda method: get_method_rank(ranking_info, method))
        count = 1
        for buggy_method in buggy_methods_sorted_list:
            sum_for_ap += count /get_method_rank(ranking_info, buggy_method)
            count+=1
        sum_for_map += sum_for_ap
    map = sum_for_map/number_of_bugs
    return map

def get_mrr (project, ochiai_identificator, project_bugs_data):
    sum_for_mrr = 0
    number_of_bugs = len(project_bugs_data)
    for bug_id in project_bugs_data.keys():
        buggyMethods = project_bugs_data[bug_id]["buggyMethods"]
        buggy_methods_list = extract_buggy_methods_list (ranking, buggyMethods)
        if ochiai_identificator == "stackTraces":
            ranking_info = get_st_raking_dict(project_bugs_data[bug_id]["stack_trace_methods"])
        else:
            ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
            try:
                ranking_info = json_file_to_dict(ranking_file)
            except FileNotFoundError:
                continue
        if len(ranking_info) == 0: # No ranking info
            if ochiai_identificator != "stackTraces": # Due to the ausence of gzoltar files
                number_of_bugs -= 1
            continue
        best_rank_found = len(ranking_info.keys())
        for buggy_method in  buggy_methods_list:
            if get_method_rank(ranking_info, buggy_method) < best_rank_found:
                best_rank_found = get_method_rank(ranking_info, buggy_method)
        sum_for_mrr += 1/best_rank_found
    mrr = sum_for_mrr/number_of_bugs
    return mrr

def get_st_raking_dict(stack_trace_methods):
    st_method_formated = []
    for st_method in stack_trace_methods:
        last_dot_index = st_method.rfind('.')
        if last_dot_index != -1:
            st_method_id = st_method[:last_dot_index] + '#' + st_method[last_dot_index + 1:]
            st_method_formated.append(st_method_id)
    return {item: index+1 for index, item in enumerate(st_method_formated)}

bugs_data = json_file_to_dict(data_file_path)
for ochiai_output_folder in ochiai_scores_paths_list:
    for bug_report_analysis_file in glob.glob(ochiai_output_folder + "*" + os.sep + "*.json"):
        project = bug_report_analysis_file.split(os.sep)[-2]
        bug_id = bug_report_analysis_file.split(os.sep)[-1].replace(".json","")
        ochiai_identificator = bug_report_analysis_file.split(os.sep)[-3]
        print("---------------")
        print(ochiai_identificator + " - " + project + " - " + bug_id)
        if "buggyMethods" not in bugs_data[project][bug_id].keys() or bugs_data[project][bug_id]["buggyMethods"] == {}:
            print("No buggy methods. Skipping it")
            continue
        buggyMethods = bugs_data[project][bug_id]["buggyMethods"]
        stack_trace_files = bugs_data[project][bug_id]["stack_trace_files"]
        stack_trace_methods = bugs_data[project][bug_id]["stack_trace_methods"]
        ochiai_scores_data = json_file_to_dict(bug_report_analysis_file)
        ochiai_scores_data = sort_dict(ochiai_scores_data)
        if len(ochiai_scores_data) == 0:
            print("No Ochiai scores for this bug due to gzoltar files problem. Skipping it")
            continue

        ranking = {}
        actual_pos = 1
        repetition_count = 0
        previous_score = -1
        for method in ochiai_scores_data.keys():
            if ochiai_scores_data[method] != previous_score:
                actual_pos = actual_pos + repetition_count
                repetition_count = 0
                previous_score = ochiai_scores_data[method]
                ranking [method] = actual_pos
                actual_pos += 1
            else:
                repetition_count +=1
                ranking [method] = actual_pos

        dict_to_json_file(bug_id, ranking, out + ochiai_identificator + "/" + project + "/")

        buggy_methods_list = extract_buggy_methods_list (ranking, buggyMethods)
        st_ranking = get_st_raking_dict(stack_trace_methods)
        N = len(stack_trace_methods)
        print("Number of buggy methods - " + str(len(buggyMethods)))
        print("Stack trace size - " + str(N))
        print("Position of the first buggy method in the stack trace - " + str(get_first_buggy_method_in_stack_trace(buggy_methods_list, stack_trace_methods)))
        print("Position of the first buggy method into the Ochiai classification - " + str(get_best_classified_buggy_method (ranking, buggy_methods_list)))
        print("Precision Ochiai Top " + str(N) + " - " + str(get_precision_top_n (ranking, N, buggy_methods_list )))
        print("Recall Ochiai Top " + str(N) + " - " + str(get_recall_top_n (ranking, N, buggy_methods_list)))
        print("F1 Ochiai Top " + str(N) + " - " + str(get_f1_top_n (ranking, N, buggy_methods_list)))
        print("Precision Stack Trace Top " + str(N) + " - " + str(get_precision_top_n (st_ranking, N, buggy_methods_list )))
        print("Recall Stack Trace Top " + str(N) + " - " + str(get_recall_top_n (st_ranking, N, buggy_methods_list)))
        print("F1 Stack Trace Top " + str(N) + " - " + str(get_f1_top_n (st_ranking, N, buggy_methods_list)))

    print("")
    print("")
    print("======= " + ochiai_identificator + " =======")
    for project in bugs_data.keys():
        if project == "Lang" or project == "Math":
            continue
        print("")
        print("----")
        print(project)
        print("Map Ochiai - " + str(get_map (project, ochiai_identificator, bugs_data[project])))
        print("Map stack traces - " + str(get_map (project, "stackTraces",bugs_data[project])))
        print("MRR Ochiai- " + str(get_mrr (project, ochiai_identificator,bugs_data[project])))
        print("MRR Stack Traces- " + str(get_mrr (project, "stackTraces",bugs_data[project])))
    print("==============")
    print("")



---------------
Ochiai1 - Cli - 14
Number of buggy methods - 1
Stack trace size - 8
Position of the first buggy method in the stack trace - 5
Position of the first buggy method into the Ochiai classification - 8
Precision Ochiai Top 8 - 0.125
Recall Ochiai Top 8 - 1.0
F1 Ochiai Top 8 - 0.2222222222222222
Precision Stack Trace Top 8 - 0.125
Recall Stack Trace Top 8 - 1.0
F1 Stack Trace Top 8 - 0.2222222222222222
---------------
Ochiai1 - Cli - 5
Number of buggy methods - 1
Stack trace size - 3
Position of the first buggy method in the stack trace - 1
Position of the first buggy method into the Ochiai classification - 34
Precision Ochiai Top 3 - 0.0
Recall Ochiai Top 3 - 0.0
F1 Ochiai Top 3 - 0.0
Precision Stack Trace Top 3 - 0.3333333333333333
Recall Stack Trace Top 3 - 1.0
F1 Stack Trace Top 3 - 0.5
---------------
Ochiai1 - Closure - 125
Number of buggy methods - 1
Stack trace size - 40
Position of the first buggy method in the stack trace - 9
Position of the first buggy method into t