In [46]:
base_path = "/Users/lorenapacheco/Concordia/Masters/"
ochiai_scores_paths_list = [base_path + "BugReportsMining/ochiaiScores/", base_path + "BugReportsMining/ochiaiScores2/"]
out = base_path + "BugReportsMining/Rankings/"

N = 6

buggy_methods = {
    "fastjson": {
        "2351": [
            "JSONLexerBase.scanString",
            "ASMDeserializerFactory._deserialzeArrayMapping"
        ],
        "3119": [
            "ASMSerializerFactory.createJavaBeanSerializer"
        ],
        "3280": [
            "JavaBeanDeserializer.createInstance",
            "TypeUtils.castToLocalDateTime"
        ],
        "3637": [
            "TypeUtils.castToSqlTime"
        ]
    },
    "commons-csv": {
        "CSV-100": [
            "CSVParser.getHeaderMap"
        ]
    },
    "jsoup": {
        #"1098": [
        #    "W3CDom.fromJsoup",
        #    "DocumentType.DocumentType"
        #],
        "1218": [
            "CharacterReader.bufferUp",
            "CharacterReader.mark",
            "CharacterReader.rewindToMark",
            "Tokeniser.read",
            "Tokeniser.consumeCharacterReference",
            "TokeniserState.read"
        ],
        "1251": [
            "TokeniserState.read"
        ],
        "1274": [
            "QueryParser.QueryParser"
        ],
        "1324": [
            "CharacterReader.bufferUp",
            "Token.toString"
        ],
        "740": [
            "Document.charset",
            "Document.encoder"
        ],
        "968": [
            "ConstrainableInputStream.expired"
        ],
        "980": [
            "UncheckedIOException.UncheckedIOException",
            "DataUtil.parseInputStream"
        ],
        "990":  [
            "DataUtil.parseInputStream"
        ]
    },
    #"junit4": {
    #    "1178": [
    #        "ArrayComparisonFailure.ArrayComparisonFailure"
    #    ]
    #}
}

In [47]:
import glob
import os
import json

def json_file_to_dict(file):
    data = {}
    with open(os.path.join(file), 'r') as fp:
        data = json.load(fp)
    fp.close()
    return data

def dict_to_json_file(file, dic, folder):
    if not os.path.exists(folder):
        os.makedirs(folder)
    with open(os.path.join(folder, file+'.json'), 'w') as fp:
        json.dump(dic, fp, sort_keys=True, indent=4)
    fp.close()

def sort_dict (dictionary):
    return dict(sorted(dictionary.items(), key=lambda item: item[1], reverse = True))

def get_number_of_buggy_methods_in_top_n (ranking_data, n, buggy_methods_list):
    buggy_methods_in_top_n = 0
    for method in buggy_methods_list:
        try:
            if ranking_data[method] <= n:
                buggy_methods_in_top_n +=1
        except:
            continue
    return buggy_methods_in_top_n

def get_precision_top_n (ranking_data, n, project, bug_id):
    buggy_methods_list = buggy_methods [project] [bug_id]
    buggy_methods_in_top_n = get_number_of_buggy_methods_in_top_n(ranking_data, n, buggy_methods_list)
    precision =  buggy_methods_in_top_n / n
    return precision

def get_average_precision_top_n(n, project):
    sum = 0
    for bug_id in buggy_methods[project].keys():
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        sum += get_precision_top_n (ranking_info, n, project, bug_id)
    return sum/len(buggy_methods[project].keys())


def get_recall_top_n (ranking_data, n, project, bug_id):
    buggy_methods_list = buggy_methods [project] [bug_id]
    buggy_methods_in_top_n = get_number_of_buggy_methods_in_top_n(ranking_data, n, buggy_methods_list)
    recall = buggy_methods_in_top_n / len(buggy_methods_list)
    return recall

def get_average_recall_top_n(n, project):
    sum = 0
    for bug_id in buggy_methods[project].keys():
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        sum += get_recall_top_n (ranking_info, n, project, bug_id)
    return sum/len(buggy_methods[project].keys())

def get_f1_top_n (ranking_data, n, project, bug_id):
    precision = get_precision_top_n(ranking_data, n, project, bug_id)
    recall = get_recall_top_n(ranking_data, n, project, bug_id)
    try:
        f1 = precision * recall / (precision + recall)
    except ZeroDivisionError:
        return 0.0
    return f1

def get_average_f1_top_n(n, project):
    sum = 0
    for bug_id in buggy_methods[project].keys():
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        sum += get_f1_top_n (ranking_info, n, project, bug_id)
    return sum/len(buggy_methods[project].keys())

def get_method_rank(ranking_info, method):
    try:
        rank = ranking_info[method]
    except KeyError:
        rank = len(ranking_info) + 1
    return rank

def get_map (project, ochiai_identificator):
    sum_for_map = 0
    for bug_id in buggy_methods[project].keys():
        sum_for_ap = 0
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        buggy_methods_sorted_list = sorted(buggy_methods[project][bug_id], key = lambda method: get_method_rank(ranking_info, method))
        count = 1
        for buggy_method in buggy_methods_sorted_list:
            sum_for_ap += count /get_method_rank(ranking_info, buggy_method)
            count+=1
        sum_for_map += sum_for_ap
    map = sum_for_map/len(buggy_methods[project].keys())
    return map

def get_mrr (project, ochiai_identificator):
    sum_for_mrr = 0
    for bug_id in buggy_methods[project].keys():
        ranking_file = out + ochiai_identificator + "/" + project + "/" + bug_id + ".json"
        ranking_info = json_file_to_dict(ranking_file)
        best_rank_found = len(ranking_info.keys())
        for buggy_method in  buggy_methods[project][bug_id]:
            if get_method_rank(ranking_info, buggy_method) < best_rank_found:
                best_rank_found = get_method_rank(ranking_info, buggy_method)
        sum_for_mrr += 1/best_rank_found
    mrr = sum_for_mrr/len(buggy_methods[project].keys())
    return mrr


for ochiai_output_folder in ochiai_scores_paths_list:
    for bug_report_analysis_file in glob.glob(ochiai_output_folder + "*/*.json"):
        project = bug_report_analysis_file.split("/")[-2]
        bug_id = bug_report_analysis_file.split("/")[-1].replace(".json","")
        ochiai_identificator = bug_report_analysis_file.split("/")[-3]
        print(ochiai_identificator)
        if project not in buggy_methods.keys():
            continue
        if bug_id not in buggy_methods[project].keys():
            continue
        print("---------------")
        print(ochiai_identificator + " - " + project + " - " + bug_id)
        coverage_file = base_path + "BugReportsMining/coverageMining/" + project + "/" +  bug_id + ".json"
        coverage_info = json_file_to_dict(coverage_file)
        ochiai_scores_data = json_file_to_dict(bug_report_analysis_file)
        ochiai_scores_data = sort_dict(ochiai_scores_data)

        ranking = {}
        actual_pos = 1
        repetition_count = 0
        previous_score = -1
        for test in ochiai_scores_data.keys():
            if ochiai_scores_data[test] != previous_score:
                actual_pos = actual_pos + repetition_count
                repetition_count = 0
                previous_score = ochiai_scores_data[test]
                ranking [test] = actual_pos
                actual_pos += 1
            else:
                repetition_count +=1
                ranking [test] = actual_pos

        print("Precision Top 10 - " + str(get_precision_top_n (ranking, N, project, bug_id)))
        print("Recall Top 10 - " + str(get_recall_top_n (ranking, N, project, bug_id)))
        print("F1 Top 10 - " + str(get_f1_top_n (ranking, N, project, bug_id)))
        dict_to_json_file(bug_id, ranking, out + ochiai_identificator + "/" + project + "/")

    print("")
    print("")
    print("======= " + ochiai_identificator + " =======")
    for project in buggy_methods.keys():
        print("")
        print("----")
        print(project)
        print("Map - " + str(get_map (project, ochiai_identificator)))
        print("MRR- " + str(get_mrr (project, ochiai_identificator)))
    print("==============")
    print("")


for bug_report_analysis_file in glob.glob("/Users/lorenapacheco/Concordia/Masters/BugReportsMining/Rankings/stackTraces/*/*.json"):
    project = bug_report_analysis_file.split("/")[-2]
    bug_id = bug_report_analysis_file.split("/")[-1].replace(".json","")
    ochiai_identificator = bug_report_analysis_file.split("/")[-3]
    print("---------------")
    print(ochiai_identificator + " - " + project + " - " + bug_id)
    ranking_file = base_path + "BugReportsMining/Rankings/stackTraces/" + project + "/" +  bug_id + ".json"
    ranking_info = json_file_to_dict(ranking_file)
    print("Precision Top 10 - " + str(get_precision_top_n (ranking_info, N, project, bug_id)))
    print("Recall Top 10 - " + str(get_recall_top_n (ranking_info, N, project, bug_id)))
    print("F1 Top 10 - " + str(get_f1_top_n (ranking_info, N, project, bug_id)))

print("")
print("")
print("======= stackTraces =======")
for project in buggy_methods.keys():
    print("")
    print("----")
    print(project)
    print("Map - " + str(get_map (project, "stackTraces")))
    print("MRR- " + str(get_mrr (project, "stackTraces")))
print("==============")
print("")



ochiaiScores
---------------
ochiaiScores - fastjson - 3280
Precision Top 10 - 0.0
Recall Top 10 - 0.0
F1 Top 10 - 0.0
ochiaiScores
---------------
ochiaiScores - fastjson - 2351
Precision Top 10 - 0.0
Recall Top 10 - 0.0
F1 Top 10 - 0.0
ochiaiScores
---------------
ochiaiScores - fastjson - 3637
Precision Top 10 - 0.0
Recall Top 10 - 0.0
F1 Top 10 - 0.0
ochiaiScores
---------------
ochiaiScores - fastjson - 3119
Precision Top 10 - 0.0
Recall Top 10 - 0.0
F1 Top 10 - 0.0
ochiaiScores
---------------
ochiaiScores - commons-csv - CSV-100
Precision Top 10 - 0.16666666666666666
Recall Top 10 - 1.0
F1 Top 10 - 0.14285714285714285
ochiaiScores
---------------
ochiaiScores - jsoup - 990
Precision Top 10 - 0.0
Recall Top 10 - 0.0
F1 Top 10 - 0.0
ochiaiScores
---------------
ochiaiScores - jsoup - 968
Precision Top 10 - 0.0
Recall Top 10 - 0.0
F1 Top 10 - 0.0
ochiaiScores
---------------
ochiaiScores - jsoup - 980
Precision Top 10 - 0.0
Recall Top 10 - 0.0
F1 Top 10 - 0.0
ochiaiScores
---------