In [10]:
import numpy as np
from sbfl.base import SBFL
from failure_clustering.base import FailureDistance
from failure_clustering.clustering import Agglomerative

In [11]:
test_names = np.array(["T1", "T2", "T3", "T4", "T5"])
X = [
    [0, 1, 1, 0, 1, 0], # Coverage of T1 
    [1, 0, 0, 1, 0, 0], # Coverage of T2
    [1, 1, 0, 0, 1, 1], # Coverage of T3
    [0, 1, 0, 1, 1, 0], # Coverage of T4
    [1, 1, 0, 0, 1, 1], # Coverage of T5
]
y = [0, 0, 1, 0, 1] # Result of T1..5 (FAIL: 0, PASS:1)

sbfl = SBFL(formula='Tarantula')
suspiciousness = sbfl.fit_predict(X, y)
print(suspiciousness)

[0.25 0.4  1.   1.   0.4  0.  ]


In [12]:
fd = FailureDistance(measure='hdist')
distance_matrix, failure_indices = fd.get_distance_matrix(X, y,
    weights=suspiciousness, return_index=True)
print(distance_matrix)

[[0.         1.         0.77380952]
 [1.         0.         0.21428572]
 [0.77380952 0.21428572 0.        ]]


In [13]:
aggl = Agglomerative(linkage='complete')
clustering = aggl.run(distance_matrix,
    stopping_criterion='min_intercluster_distance_elbow')
print(test_names[failure_indices])
# print(aggl.labels)
# print(aggl.mdist)
print("===========Clustering Done=============")

for i, cluster in zip(failure_indices, clustering):
    print(f"Cluster of {test_names[i]}: {cluster}")

['T1' 'T2' 'T4']
Cluster of T1: 0
Cluster of T2: 1
Cluster of T4: 1


In [14]:
# thresholding example

clustering = aggl.run(distance_matrix, stopping_criterion=0.5)
print(clustering)

print("===========Clustering Done=============")
for i, cluster in zip(failure_indices, clustering):
    print(f"Cluster of {test_names[i]}: {cluster}")

[0 1 1]
Cluster of T1: 0
Cluster of T2: 1
Cluster of T4: 1
