In [10]:
import numpy as np
import pandas as pd
from sbfl.base import SBFL
from scipy.stats import rankdata

#   e1,e2,e3,e4,e5,e6
X = np.array([
    [1,1,0,0,1,1], # coverage of test t0
    [1,1,0,0,1,1], # coverage of test t1
    [0,1,1,0,1,0], # coverage of test t2
    [1,0,0,1,0,0],
    [0,1,0,1,1,0],
], dtype=bool)

y = np.array([1,1,0,0,0], dtype=bool)

X, y

(array([[ True,  True, False, False,  True,  True],
        [ True,  True, False, False,  True,  True],
        [False,  True,  True, False,  True, False],
        [ True, False, False,  True, False, False],
        [False,  True, False,  True,  True, False]]),
 array([ True,  True, False, False, False]))

In [11]:
sbfl = SBFL(formula='Ochiai')
sbfl.fit_predict(X, y)

array([0.33333333, 0.57735027, 0.57735027, 0.81649658, 0.57735027,
       0.        ])

In [12]:
sbfl.ranks(method='max')

array([5, 4, 4, 1, 4, 6])

In [13]:
names = ['file', 'method']
elements = [
    ('file1.py', 'method1'),
    ('file2.py', 'method2'),
    ('file2.py', 'method3'),
    ('file2.py', 'method4'),
    ('file2.py', 'method5'),
    ('file2.py', 'method6'),
]
df = sbfl.to_frame(elements=elements, names=names)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,score
file,method,Unnamed: 2_level_1
file1.py,method1,0.333333
file2.py,method2,0.57735
file2.py,method3,0.57735
file2.py,method4,0.816497
file2.py,method5,0.57735
file2.py,method6,0.0


In [14]:
df.max(level='file')

Unnamed: 0_level_0,score
file,Unnamed: 1_level_1
file1.py,0.333333
file2.py,0.816497


# Read GCOV files

In [15]:
gcov_dir = {
    'test100': 'resources/yara-buggy#3-100',
    'test101': 'resources/yara-buggy#3-101',
    'test102': 'resources/yara-buggy#3-102',
    'test103': 'resources/yara-buggy#3-103',
    'test104': 'resources/yara-buggy#3-104',
}
gcov_files = {test:[] for test in gcov_dir}
for test in gcov_dir:
    for path in os.listdir(gcov_dir[test]):
        if path.endswith('.gcov'):
            gcov_files[test].append(os.path.join(gcov_dir[test], path))
    print(f"{test}: {len(gcov_files[test])} gcov files are found.")

test100: 48 gcov files are found.
test101: 48 gcov files are found.
test102: 48 gcov files are found.
test103: 48 gcov files are found.
test104: 48 gcov files are found.


In [16]:
from sbfl.utils import gcov_files_to_frame, get_sbfl_scores_from_frame

cov_df = gcov_files_to_frame(gcov_files, only_covered=True)
cov_df

Unnamed: 0_level_0,Unnamed: 1_level_0,test100,test101,test102,test103,test104
file,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
threading.c,172,12,12,12,12,12
threading.c,175,12,12,12,12,12
threading.c,178,12,12,12,12,12
threading.c,182,8,8,8,8,8
threading.c,185,8,8,8,8,8
...,...,...,...,...,...,...
hex_lexer.l,252,1,1,1,0,0
hex_lexer.l,253,1,1,1,0,0
hex_lexer.l,254,1,1,1,0,0
hex_lexer.l,256,1,1,1,0,0


In [17]:
get_sbfl_scores_from_frame(cov_df, failing_tests=['test102'])

Unnamed: 0_level_0,Unnamed: 1_level_0,score
file,line,Unnamed: 2_level_1
threading.c,172,0.447214
threading.c,175,0.447214
threading.c,178,0.447214
threading.c,182,0.447214
threading.c,185,0.447214
...,...,...
hex_lexer.l,252,0.577350
hex_lexer.l,253,0.577350
hex_lexer.l,254,0.577350
hex_lexer.l,256,0.577350
