In [1]:
import pandas as pd 
import os
import json
import glob
import re
import sys
import concurrent.futures
from junitparser import JUnitXml, Failure, Error, Skipped
sys.setrecursionlimit(20000) # Current limit = 999

from CommitGraph import CommitGraph

In [2]:
def createDirIfNotExists(folder_name):
    if not os.path.isdir(folder_name): 
        os.makedirs(folder_name)

In [3]:
root="/home/jovyan/work"
analysis_results_path = root + "/analysis/results/"
createDirIfNotExists(analysis_results_path)

In [4]:
def searchRegression(graph, init_node):
    paths = []
    candidates = []
    visited = []
    queue = []   

    visited.append(init_node)
    queue.append(init_node)

    while queue:
        node = queue.pop()  

        successParents = True
        parents = graph[node['commit']]['parents']

        if node['State'] == "TestFail":
            candidates = []

        for parent_hash in parents:
            if parent_hash not in graph: # Reach first commit
                successParents = False
                if len(queue)==0:
                    break
                else: 
                    continue # Check other branches
            parent = graph[parent_hash]
            successParent = parent['State'] == "TestSuccess"
            successParents = successParents and successParent
            if not successParent:
                if parent['State'] in ["BuildError", "TestBuildError"]:
                    candidates.append(node)
                if parent_hash not in visited:
                    visited.append(parent_hash)
                    queue.append(parent)
    
        if successParents and node['State'] != "TestSuccess":
            
            if node['State'] == 'TestFail': 
                return [node]
            else:
                candidates = candidates + [node]
                if len(queue)==0:
                    return candidates
                else:
                    paths.append(candidates)
    
    if len(paths) > 0: return paths[0]
    return []

In [5]:
def getExecutionsOnPastSequentially(graph, init_node):
    executionsOnPastSequentially = 0
    queue = [] 
    visited = []

    queue.append(init_node)

    while queue:
        node = queue.pop()
        parents = graph[node['commit']]['parents']
        
        for parent_hash in parents:
            if parent_hash in graph:
                parent = graph[parent_hash]
                if parent['HasTestReport'] and parent not in visited:
    #                 report_path = bug_path+"commits/%d-%s/test-report.xml"%(parent['id'], parent_hash)
    #                 test_report = analyzeTestReport(test_method, report_path)
                    executionsOnPastSequentially+=1
                    queue.append(parent)
                    visited.append(parent)
    return executionsOnPastSequentially

In [6]:
def analyzeTestReport(test_method, report_path):
    # APPLY THIS TO EACH COMMIT WITH FAILED TEST
    xml = JUnitXml.fromfile(report_path)
    test_case = None
    for tc in xml:
        if test_method == tc.name:
            test_case = tc
            break
    if test_case is None: raise Exception("Test case not found")

    result = {
        'name': test_method,
        'success': True,
        'failure': False,
        'failure_msg': None,
        'error': False,
        'error_msg': None,
        'skipped': False
    }
    for elem in tc:
        result['success'] = False
        if elem.__class__ is Failure:
            result['failure'] = True
            result['failure_msg'] = elem.type
        if elem.__class__ is Error:
            result['error'] = True
            result['error_msg'] = elem.message
        if elem.__class__ is Skipped:
            result['skipped'] = True

    return result



In [7]:
def getTestName(cmd):
    
    if cmd.startswith("mvn"):
        return re.search(r"-Dtest=(.*) test",cmd).group(1)
    if cmd.startswith("ant"):
        return re.search(r"-Dtest.entry.method=(.*) run",cmd).group(1)

In [8]:
def analyzeBug(project, bug_id, force=False):
    
    bug_name = "Bug_"+str(bug_id)
    bug_path = "{root}/results/{project}/{bug_name}/".format(root=root, project=project, bug_name=bug_name)
    results_dir = analysis_results_path+"{project}/{bug_name}/".format(project=project, bug_name=bug_name)
    result_file = results_dir+"bug_result.json"
    
    
    if not force and os.path.isfile(result_file):  
        with open(result_file) as json_file:
            bug_result = json.load(json_file)  
            return bug_result
    
#     print("INIT "+project+" "+bug_name)
    
    with open("{root}/configFiles/{project}/bugs/{bug_name}.json".format(root=root,project=project, bug_name=bug_name)) as f:
        bug_info = json.load(f)
        
    test_name = getTestName(bug_info['test_command'])
    #test_method = test_name.split("#")[1]

    bug_result = {
        'id': project + "_" + bug_name,
        'bug': bug_name,
        'project': project,
        "fix_pass": True,
        "prev_fails": True,
        "category": None,
        "sub_category": "-",
        "test_name": test_name,
        "bug_report": bug_info['bug_report'],
        "fix_commit": bug_info['fix_commit'],
        "BIC_candidates": []
    }
    
    if not os.path.isfile(bug_path+'commit_history.csv'):
        bug_result['category'] = "Other error"
        return bug_result
    
    createDirIfNotExists(results_dir)
    
    try:
        commit_graph = CommitGraph(project, bug_id, bug_path, results_dir, restore=True)
    except IndexError as e:
        bug_result['category'] = "No results - Error at performing experiment"
        return bug_result
        
    fix_commit = commit_graph.graph[bug_info['fix_commit']]
    
    if not fix_commit['ExecuteTest']: # FIX COMMIT - SHOULD PASS
        
        bug_result['category'] = "Test fails in the fix commit"
        bug_result['fix_pass'] = False
        bug_result['prev_fails'] = None
        
        if fix_commit['Build']:
            if fix_commit['BuildTest']:
                if not fix_commit['HasTestReport']:
                    bug_result['sub_category'] = "The test was not executed"
                else:
                    bug_result['sub_category'] = "Test execution fails"
#                     analyzeTestReport(test_method, bug_path+"commits/0-"+fix_commit['commit']+"/test-report.xml")
                     
            else:
                bug_result['sub_category'] = "Failure in test build"
        else:
            bug_result['sub_category'] = "Failure in source build"
        
        bug_result['executionsOnPastSequentially'] = 0
    
    else: 

        # SEARCH REGRESSION
        candidates = searchRegression(commit_graph.graph, fix_commit)
        if len(candidates) > 0:

            bug_result['BIC_candidates'] = list(map(lambda c: (c['id'],c['commit'] ),candidates))
            bug_result['category'] = "A regression is detected"

            if len(candidates) == 1:
                bug_result['sub_category'] = "Unique candidates" 
            else:
                bug_result['sub_category'] = "Multiple candidates" 

        # SEARCH OTHER BIC
        else:
            bug_result['category'] = "No regression is detected"
            bug_result['sub_category'] = "-"
            

        bug_result['executionsOnPastSequentially'] = getExecutionsOnPastSequentially(commit_graph.graph, fix_commit)
    
    executionsOnPast = 0
    
    for node in commit_graph.graph.values():
        if node['HasTestReport']:
            executionsOnPast+=1
            
    bug_result['executionsOnPast'] = executionsOnPast
    bug_result['numCommits'] = len(commit_graph.graph.values())
    
    # Save bug result
    with open(results_dir+"bug_result.json",'w+') as json_file:
        json.dump(bug_result, json_file, indent=4)
        
#     print("FINISH "+project+" "+bug_name)

    return bug_result

In [9]:
# CASE 1: Regression
candidates = analyzeBug("JacksonCore", 11)['BIC_candidates']
assert candidates[0][0] == 162

In [10]:
# CASE 1.1: Regression - Case when algorithm reach first commit and need to check remaining in queue
candidates = analyzeBug("JacksonCore", 10)['BIC_candidates']
assert candidates[0][0] == 99

In [11]:
# CASE 1.2: Regression - Could be more candidates
candidates = analyzeBug("JacksonDatabind", 59)['BIC_candidates']
assert set([146]) <= set(map(lambda c: c[0],candidates))

In [12]:
# CASE 2: No Regression - No candidate commit
candidates = analyzeBug("JacksonDatabind", 86, True)['BIC_candidates']
assert len(candidates) == 0

In [13]:
# CASE 2.1: No Regression - No candidate commit
candidates = analyzeBug("Collections", 28)['BIC_candidates']
assert len(candidates) == 0

In [14]:
# CASE 3: Regresion but with buildability limitations
candidates = analyzeBug("JacksonDatabind", 52)['BIC_candidates']
assert set([905, 906, 907, 908, 909, 910, 911, 912, 915, 919, 920, 921, 922, 923, 924, 925, 926, 927]) <= set(map(lambda c: c[0],candidates))

In [15]:
# CASE 3.1: Regresion but with buildability limitations
candidates = analyzeBug("JacksonDatabind", 38)['BIC_candidates']
assert set([70]) <= set(map(lambda c: c[0],candidates))

In [16]:
# CASE 4: Use Ant
candidates = analyzeBug("Lang", 1)['BIC_candidates']
assert set() <= set(map(lambda c: c[0],candidates))

In [17]:
# CASE 4.1: Use Ant
candidates = analyzeBug("Mockito", 1)['BIC_candidates']
assert set([1]) <= set(map(lambda c: c[0],candidates))

In [18]:
projects = [
    "JacksonXml", "Time", "Collections", "Compress", "Csv", "JacksonCore", "JacksonDatabind", "Gson", "Jsoup",
    "Lang", "Math", "Closure", "Mockito"
]
future_results = []
with concurrent.futures.ThreadPoolExecutor() as executor:
    for project in projects:
        # FOR EACH BUG
        for bug_path in glob.glob("{root}/results/{project}/Bug_*/".format(root=root, project=project)):
            bug = re.search(r"Bug_(\d+)", bug_path).group(1)
            try:
                future = executor.submit(analyzeBug, project, bug, False)
                future_results.append(future)
            except Exception as e:
                print(bug_path)
                print(e)
bug_results = [future.result() for future in future_results]

In [19]:
df = pd.DataFrame(bug_results)[['id', 'category', 'sub_category']]
df[df['category'] != None].set_index('id').sort_index()
print(len(df))
df['category'].value_counts()

730


No regression is detected       570
Test fails in the fix commit     85
A regression is detected         75
Name: category, dtype: int64

In [20]:
#df[df['category']=='Test failed at fix commit']
df.groupby(['category', "sub_category"]).count()['id']

category                      sub_category             
A regression is detected      Multiple candidates           26
                              Unique candidates             49
No regression is detected     -                            570
Test fails in the fix commit  Failure in source build       60
                              Failure in test build          3
                              Test execution fails           6
                              The test was not executed     16
Name: id, dtype: int64

In [21]:
#df[df['category']=='Test fails in the fix commit'][df['sub_category']=='Test execution fails']
pd.set_option('display.max_rows', 100)
#df[df['category']=='A regression is detected']#[df['sub_category']=='Unique candidates']
df[df['sub_category']=='Test execution fails']

Unnamed: 0,id,category,sub_category
12,Time_Bug_9,Test fails in the fix commit,Test execution fails
16,Time_Bug_8,Test fails in the fix commit,Test execution fails
23,Time_Bug_7,Test fails in the fix commit,Test execution fails
26,Time_Bug_16,Test fails in the fix commit,Test execution fails
257,Jsoup_Bug_67,Test fails in the fix commit,Test execution fails
334,Jsoup_Bug_78,Test fails in the fix commit,Test execution fails


In [33]:
df = pd.DataFrame(bug_results)[['id', 'executionsOnPastSequentially', 'executionsOnPast', 'numCommits']]
df = df[df['numCommits'] > 1]
df['executionsOnPastSequentially_rate'] = df.apply (lambda row: row['executionsOnPastSequentially'] * 100 / row['numCommits'], axis=1)
df['executionsOnPast_rate'] = df.apply (lambda row: row['executionsOnPast'] * 100 / row['numCommits'], axis=1)
df.round(decimals=2)

Unnamed: 0,id,executionsOnPastSequentially,executionsOnPast,numCommits,executionsOnPastSequentially_rate,executionsOnPast_rate
2,JacksonXml_Bug_4,88,313,651,13.52,48.08
3,JacksonXml_Bug_2,5,173,603,0.83,28.69
4,JacksonXml_Bug_3,43,218,602,7.14,36.21
7,Time_Bug_14,6,7,1573,0.38,0.45
9,Time_Bug_1,150,151,1717,8.74,8.79
...,...,...,...,...,...,...
725,Mockito_Bug_36,35,175,1190,2.94,14.71
726,Mockito_Bug_25,243,509,1833,13.26,27.77
727,Mockito_Bug_12,3,15,1386,0.22,1.08
728,Mockito_Bug_27,21,33,1581,1.33,2.09


In [35]:
df['executionsOnPastSequentially_rate'].mean()

10.202383375125324

In [34]:
df['executionsOnPast_rate'].mean()

21.075428838804818

In [25]:
df['numCommits'].mean()

1588.4068493150685