In [5]:
import numpy as np
import pandas as pd
from sbfl.base import SBFL

# TODO: add functionality to convert gcov result files to X, y
# TODO: add functionality to convert Cobertura result files to X, y

#   e1,e2,e3,e4
X = np.array([
    [1,0,1,0], # coverage of test t0
    [0,0,1,1], # coverage of test t1
    [1,1,0,0]  # coverage of test t2
], dtype=bool)

y = np.array([
    1, # t0: PASS
    0, # t1: FAIL
    1  # t2: PASS
], dtype=bool)

X, y

(array([[ True, False,  True, False],
        [False, False,  True,  True],
        [ True,  True, False, False]]),
 array([ True, False,  True]))

In [4]:
# Calculate the suspiciousness scores
sbfl = SBFL(formula='Ochiai')
sbfl.fit(X, y)
print(sbfl.scores_)

[0.         0.         0.70710678 1.        ]


In [5]:
names = ['file', 'method']
elements = [
    ('file1.py', 'method1'),
    ('file2.py', 'method2'),
    ('file2.py', 'method3'),
    ('file2.py', 'method4')
]
df = sbfl.to_frame(elements=elements, names=names)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,score
file,method,Unnamed: 2_level_1
file1.py,method1,0.0
file2.py,method2,0.0
file2.py,method3,0.707107
file2.py,method4,1.0


In [6]:
df.max(level='file')

Unnamed: 0_level_0,score
file,Unnamed: 1_level_1
file1.py,0.0
file2.py,1.0


# gcov 연동

In [6]:
from sbfl.utils import gcov_files_to_frame, get_sbfl_scores_from_frame

coverage_files = {
    't1': ['sample/f1_f5/t1.gcov'],
    't3': ['sample/f1_f5/t3.gcov'],
    't6': ['sample/f1_f5/t6.gcov'],
    't7': ['sample/f1_f5/t7.gcov'],
    't9': ['sample/f1_f5/t9.gcov'],
    't15': ['sample/f1_f5/t15.gcov'],
    't16': ['sample/f1_f5/t16.gcov'],
    't17': ['sample/f1_f5/t17.gcov'],
    't19': ['sample/f1_f5/t19.gcov'],
    't20': ['sample/f1_f5/t20.gcov'],
    't21': ['sample/f1_f5/t21.gcov'],
    't22': ['sample/f1_f5/t22.gcov'],
    't24': ['sample/f1_f5/t24.gcov'],
    't30': ['sample/f1_f5/t30.gcov'],
    't35': ['sample/f1_f5/t35.gcov'],
    't37': ['sample/f1_f5/t37.gcov'],
    't38': ['sample/f1_f5/t38.gcov'],
    't39': ['sample/f1_f5/t39.gcov'],
    't40': ['sample/f1_f5/t40.gcov'],
    't41': ['sample/f1_f5/t41.gcov'],
    't42': ['sample/f1_f5/t42.gcov'],
}

cov_df = gcov_files_to_frame(coverage_files, only_covered=True)
cov_df

Unnamed: 0_level_0,Unnamed: 1_level_0,t1,t3,t6,t7,t9,t15,t16,t17,t19,t20,...,t22,t24,t30,t35,t37,t38,t39,t40,t41,t42
source,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
flex.c,122,6,1,2,1,1,4,6,2,3,2,...,1,1,1,2,1,1,1,3,1,1
flex.c,130,6,1,2,1,1,4,6,2,3,2,...,1,1,1,2,1,1,1,3,1,1
flex.c,131,6,1,2,1,1,4,6,2,3,2,...,1,1,1,2,1,1,1,3,1,1
flex.c,132,6,1,2,1,1,4,6,2,3,2,...,1,1,1,2,1,1,1,3,1,1
flex.c,137,6,1,2,1,1,4,6,2,3,2,...,1,1,1,2,1,1,1,3,1,1
flex.c,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
flex.c,12412,59,0,0,0,56,0,0,0,0,0,...,0,0,0,2,0,0,0,0,0,0
flex.c,12416,59,0,0,0,56,0,0,0,0,0,...,0,0,0,2,0,0,0,0,0,0
flex.c,12419,8,2,2,2,2,4,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
flex.c,12422,8,2,2,2,2,4,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2


In [7]:
failing_tests = [
    't1', 't3', 't6', 't7', 't9', 't15', 't17', 't20', 't21',
    't22', 't24', 't30', 't37', 't38', 't40', 't41', 't42'
]
score_df = get_sbfl_scores_from_frame(cov_df, failing_tests)
score_df.sort_values(by='score', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,score
source,line,Unnamed: 2_level_1
flex.c,7229,0.866025
flex.c,3153,0.866025
flex.c,7262,0.866025
flex.c,3147,0.866025
flex.c,915,0.866025
flex.c,...,...
flex.c,9113,0.000000
flex.c,9114,0.000000
flex.c,9117,0.000000
flex.c,9129,0.000000
