## Var declarations

In [6]:
data_file_path = "/Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/merged_data_production_bug_reports.json"
gzoltar_file_path = "/Users/lorenapacheco/Concordia/Masters/BugReportsMining/data/coverage_data_gzoltar.json"
projects_dir = "/Users/lorenapacheco/Concordia/Masters/open_source_repos_being_studied/"



projects_github = {
    "Cli": "apache/commons-cli",
    "Closure": "google/closure-compiler",
    "Codec": "apache/commons-codec",
    "Collections": "apache/commons-collections",
    "Compress": "apache/commons-compress",
    "Csv": "apache/commons-csv",
    "Gson": "google/gson",
    "JacksonCore": "FasterXML/jackson-core",
    "JacksonDatabind": "FasterXML/jackson-databind",
    "Jsoup": "jhy/jsoup",
    "JxPath": "apache/commons-jxpath",
    "Mockito": "mockito/mockito",
    "Time" : "JodaOrg/joda-time",
    "fastjson": "alibaba/fastjson",
    "junit4": "junit-team/junit4"
}

## General methods

In [11]:
import json
import os

def json_file_to_dict(file):
    data = {}
    with open(os.path.join(file), 'r') as fp:
        data = json.load(fp)
    fp.close()
    return data

def dict_to_json_file(file, dic):
    folder = os.path.dirname(file)
    if not os.path.exists(folder):
        os.makedirs(folder)
    with open(os.path.join(file), 'w') as fp:
        json.dump(dic, fp, sort_keys=True, indent=4)
    fp.close()

def read_file_lines(file_path, project_path):
    with open(project_path+file_path, 'r', encoding='utf-8', errors='ignore') as file:
        lines =file.readlines()
    file.close()
    return lines


In [19]:
def count_lines_of_code_for_coverage(file_path, project_path, covered_lines, begin_line=0, end_line=-1):

    lines = read_file_lines(file_path, project_path)
    if end_line==-1:
        end_line=len(lines)
    count = 0
    multiline_comment_active = False
    begin_pos = begin_line - 1
    end_pos = end_line - 1

    for i in range(begin_pos, end_pos + 1):
        line = lines[i]

        # If it is covered, it is a code line
        if i + 1 in covered_lines:
            count +=1
            continue

        # Ignore blocks inside multiline comments
        if line.strip().startswith("/*"):
            multiline_comment_active = True
            continue
        if multiline_comment_active:
            if line.strip().endswith("*/"):
                multiline_comment_active = False
            continue
        # Ignore single line comments
        if line.strip().startswith("//"):
            continue

        # Remove end-ine comments
        line = line.split("//")[0]
        # Count non-empty lines
        if line.strip() != "":
            # Ignoring closing braces only lines
            if line.strip() == "}":
                continue

            # Ignoring conditional lines (branches)
            if line.strip().startswith("if ") or line.strip().startswith("else ") or line.strip().startswith("else:"):
                continue

            # Ignoring loop definition lines
            if line.strip().startswith("for ") or line.strip().startswith("while "):
                continue

            # Treating statements with line break
            if line.strip().endswith("{") or line.strip().endswith("}") or line.strip().endswith(";") or line.strip().endswith(","):
                count = count + 1

    return count

bugs_data = json_file_to_dict(data_file_path)
gzoltar_data = json_file_to_dict(gzoltar_file_path)

for project in gzoltar_data.keys():
    print(project)
    repo_name = projects_github[project].split("/")[1]
    for bug_id in gzoltar_data[project].keys():

        # Skipping bugs in which the gzoltar run failed
        # TODO: Run it again in case I have time
        if project=="Closure":
            if bug_id == "143" or bug_id == "152":
                continue

        if project=="Jsoup":
            if bug_id == "6" or bug_id == "34" or bug_id == "5":
                continue
        print(bug_id)
        buggy_code_info = bugs_data[project][bug_id]["buggyMethods"]
        buggy_files = buggy_code_info.keys()

        commit_hash = bugs_data[project][bug_id]["buggy_commit"]
        project_path = projects_dir + repo_name + "/"
        os.chdir(project_path)
        checkout_command = "git checkout  --quiet " + commit_hash
        os.system(checkout_command)

        buggy_files_covered_lines = {}
        buggy_methods_covered_lines = {} # TODO: Implement method level
        for buggy_file in buggy_files:
            buggy_files_covered_lines[buggy_file] = []
            for gzoltar_file in gzoltar_data[project][bug_id].keys():
                if gzoltar_file in buggy_file:
                    for method in gzoltar_data[project][bug_id][gzoltar_file].keys():
                        buggy_files_covered_lines[buggy_file] += gzoltar_data[project][bug_id][gzoltar_file] [method]
                    break
            buggy_file_lines_of_code = count_lines_of_code_for_coverage(buggy_file, project_path, buggy_files_covered_lines[buggy_file])
            buggy_file_coverage_percentage = 100 * len(buggy_files_covered_lines[buggy_file])/buggy_file_lines_of_code
            print(buggy_file + " -  " + str(buggy_file_coverage_percentage))
    # TODO : Check the results


Cli
14
src/java/org/apache/commons/cli2/option/GroupImpl.java -  62.67123287671233
5
src/java/org/apache/commons/cli/Util.java -  62.5
Closure
106
src/com/google/javascript/jscomp/GlobalNamespace.java -  9.722222222222221
src/com/google/javascript/rhino/JSDocInfoBuilder.java -  37.5
125
src/com/google/javascript/jscomp/TypeCheck.java -  63.25411334552103
2
src/com/google/javascript/jscomp/TypeCheck.java -  60.81818181818182
27
src/com/google/javascript/rhino/IR.java -  36.56957928802589
34
src/com/google/javascript/jscomp/CodeGenerator.java -  68.26196473551637
src/com/google/javascript/jscomp/CodePrinter.java -  4.332129963898917
37
src/com/google/javascript/jscomp/NodeTraversal.java -  65.03496503496504
src/com/google/javascript/jscomp/parsing/IRFactory.java -  24.63186077643909
55
src/com/google/javascript/jscomp/FunctionRewriter.java -  15.508021390374331
Codec
8
src/java/org/apache/commons/codec/binary/Base64.java -  57.622739018087856
src/java/org/apache/commons/codec/binary/Base