In [8]:
import pandas as pd
import numpy as np
import math
import os
import time
import re

In [32]:
class Tarantula(object):

    def __init__(self, directory):

        self.__M = []  # boolean. coverage matrix
        self.__MT = []
        self.__F = []  # boolean. failing test cases
        self.__C = []
        self.__numOrigTests = 0
        self.__numStmts = 0
        self.__totalOrigFail = 0
        self.__totalOrigPass = 0
        self.__passOnStmt = []  # int. p(s), for every s and considering liveness of test cases
        self.__failOnStmt = []  # int. p(s), for every s and considering liveness of test cases
        self.__passRatio = []
        self.__failRatio = []
        self.__suspiciousness = []
        self.__directory = directory  # to store the coverage matrix file
        self.__ranking = []
        self.__fileName = []
        self.__rankData = []


    def main(self):
        
        # read file iterately under the root directory
        list = os.listdir(self.__directory) #list out all documents under the current directory
       #  print(len(list))
    
        for i in range(0,len(list)):
            start = time.time()
            self.__suspiciousness = []
            self.__fileName.insert(i, list[i])
            print("\n ========================= {0} The Result ========================= \n".format(list[i]))
            path = os.path.join(self.__directory,list[i]) 
            if os.path.isfile(path):
                self.__M = pd.read_csv(path, sep=',', index_col=0)
                # initialize all case are live
                self.TarantulaSuspiciousnessCalculation(self.__M)

                # initialize the fail and pass case number
                self.setF(self.__M.r)

                # main computation
                self.mainCompute()
                
                # create csv file to store the result to calculate the exam score
                self.__MT.loc[:,['Suspiciousness','Rank']].to_csv("result_{0}".format(list[i]))
                
            end = time.time()
            print("The time comsume {0} s".format(round((end-start), 4)))

            
    def TarantulaSuspiciousnessCalculation(self, M):
        self.__M = M

        self.__numOrigTests = self.__M.shape[0]
        # print(self.__numOrigTests)
        self.__numStmts = self.__M.shape[1] - 1
        # print(self.__numStmts)

    def setF(self, F):
        self.__F = F.tolist()
        self.calculateOrigFailAndPass()

    def mainCompute(self):
        self.calculatePassOnStmtAndFailOnStmt()
        self.calculatePassRatioAndFailRatio()
        self.calculateSuspiciousness()
        self.calculateRanking()
    
    def calculatePassOnStmtAndFailOnStmt(self):
        
        for i in range(self.__numStmts):
            self.__C = self.__M.iloc[:, i]
            self.__failOnStmt.insert(i, self.__C.tolist().count(0))
            self.__passOnStmt.insert(i, self.__C.tolist().count(1))

    def calculatePassRatioAndFailRatio(self):

        for i in range(self.__numStmts):
            if self.__totalOrigPass == 0:
                self.__passRatio.insert(i, 0)
            else:
                self.__passRatio.insert(i, self.__passOnStmt[i] / self.__totalOrigPass)

            if self.__totalOrigPass == 0:
                self.__failRatio.insert(i, 0)
            else:

                self.__failRatio.insert(i, self.__failOnStmt[i] / self.__totalOrigFail)

    def calculateSuspiciousness(self):
        
        for i in range(self.__numStmts):
            if (self.__totalOrigFail == 0) & (self.__totalOrigPass == 0):
                self.__suspiciousness.insert(i, -1)
            elif (self.__failRatio[i] == 0) & (self.__passRatio[i] == 0):
                self.__suspiciousness.insert(i, -1)
            else:
                self.__suspiciousness.insert(i, round(self.__failRatio[i] / (self.__failRatio[i] + self.__passRatio[i]), 2))

    def calculateOrigFailAndPass(self):
        
        for i in range(self.__numOrigTests):
            if self.__F[i]:

                self.__totalOrigPass += 1
            else:

                self.__totalOrigFail += 1

    def calculateRanking(self):
        # print whole list with suspiciousness and rank
        self.__MT = self.__M.T
        self.__suspiciousness.append(np.nan)
        self.__suspiciousness = pd.Series(self.__suspiciousness, index=[var for var in self.__M.columns])
        self.__ranking = self.__suspiciousness.rank(ascending=False, method="min")

        self.__MT["Suspiciousness"] = self.__suspiciousness
        self.__MT['Rank'] = self.__ranking
        self.__MT.sort_values("Rank", inplace=True)
        self.__MT.drop('r',inplace=True)
        
        # self.__rankData.append(self.__MT.head(5).index.tolist())
        

        print(self.__MT.loc[:,['Suspiciousness','Rank']])
        
        
        
    def getExamScore(self, s):
        rank = self.__MT['Rank'].loc[s]
        
        examScore = round(rank-1/self.__numStmts,2) * 100
        
        return examScore

In [37]:
if __name__ == "__main__":
    
    rootDirSort100 = r"C:\Users\Chicago Lam\jupyter\sort_csv\sort_100"
    rootDirSort300 = r"C:\Users\Chicago Lam\jupyter\sort_csv\sort_300"
    rootDirSortMultiple = r"C:\Users\Chicago Lam\jupyter\sort_csv\sort_100_multiple_bugs"

    sort100 = Tarantula(rootDirSort100)
    sort100.main()
    
    sort300 = Tarantula(rootDirSort300)
    sort300.main()
    
    sortMultiple = Tarantula(rootDirSortMultiple)
    sortMultiple.main()

    rootDirEncrypt100 = r"C:\Users\Chicago Lam\jupyter\encrypt_csv\encrypt_100"
    rootDirEncrypt300 = r"C:\Users\Chicago Lam\jupyter\encrypt_csv\encrypt_300"
    rootDirEncryptMultiple = r"C:\Users\Chicago Lam\jupyter\encrypt_csv\encrypt_100_multiple_bugs"
    
    encrypt100 = Tarantula(rootDirEncrypt100)
    encrypt100.main()
    
    encrypt300 = Tarantula(rootDirEncrypt300)
    encrypt300.main()
    
    encryptMultiple = Tarantula(rootDirEncryptMultiple)
    encryptMultiple.main()
    



     Suspiciousness  Rank
s45            1.00   1.0
s17            0.92   2.0
s33            0.86   3.0
s30            0.80   4.0
s52            0.80   4.0
s50            0.80   4.0
s63            0.80   4.0
s35            0.80   4.0
s8             0.80   4.0
s58            0.80   4.0
s27            0.80   4.0
s66            0.80   4.0
s60            0.75  13.0
s39            0.75  13.0
s68            0.75  13.0
s12            0.70  16.0
s42            0.70  16.0
s34            0.70  16.0
s43            0.70  16.0
s32            0.70  16.0
s44            0.70  16.0
s49            0.66  22.0
s41            0.66  22.0
s19            0.66  22.0
s55            0.66  22.0
s51            0.66  22.0
s10            0.66  22.0
s59            0.62  28.0
s65            0.62  28.0
s25            0.62  28.0
..              ...   ...
s13            0.17  63.0
s37            0.14  64.0
s36            0.12  65.0
s4             0.09  66.0
s83            0.02  67.0
s85            0.02  67.0
s84       

     Suspiciousness  Rank
s6             0.65   1.0
s33            0.56   2.0
s35            0.56   2.0
s17            0.53   4.0
s41            0.50   5.0
s52            0.47   6.0
s8             0.47   6.0
s12            0.45   8.0
s43            0.43   9.0
s63            0.41  10.0
s19            0.41  10.0
s25            0.41  10.0
s66            0.41  10.0
s58            0.41  10.0
s50            0.41  10.0
s30            0.41  10.0
s39            0.39  17.0
s10            0.39  17.0
s34            0.39  17.0
s85            0.39  17.0
s22            0.39  17.0
s32            0.39  17.0
s68            0.37  23.0
s60            0.35  24.0
s27            0.34  25.0
s55            0.30  26.0
s45            0.30  26.0
s16            0.29  28.0
s18            0.29  28.0
s49            0.27  30.0
..              ...   ...
s13            0.06  61.0
s47            0.05  62.0
s46            0.04  63.0
s4             0.04  63.0
s36            0.02  65.0
s37            0.02  65.0
s79         

The time comsume 0.0299 s


     Suspiciousness  Rank
s45            1.00   1.0
s17            0.91   2.0
s33            0.83   3.0
s27            0.76   4.0
s30            0.76   4.0
s35            0.76   4.0
s63            0.76   4.0
s58            0.76   4.0
s50            0.76   4.0
s8             0.76   4.0
s66            0.76   4.0
s52            0.76   4.0
s60            0.70  13.0
s39            0.70  13.0
s68            0.70  13.0
s42            0.65  16.0
s44            0.65  16.0
s34            0.65  16.0
s32            0.65  16.0
s43            0.65  16.0
s12            0.65  16.0
s41            0.61  22.0
s55            0.61  22.0
s49            0.61  22.0
s51            0.61  22.0
s19            0.61  22.0
s10            0.61  22.0
s57            0.57  28.0
s22            0.57  28.0
s67            0.57  28.0
..              ...   ...
s46            0.19  60.0
s14            0.17  62.0
s13            0.14  63.0
s37            0.11  64.0
s36            0.10  65.0
s4        

      Suspiciousness  Rank
s59             1.00   1.0
s120            1.00   1.0
s82             1.00   1.0
s9              0.58   4.0
s102            0.58   4.0
s100            0.54   6.0
s37             0.54   6.0
s51             0.54   6.0
s128            0.54   6.0
s84             0.52  10.0
s13             0.52  10.0
s106            0.52  10.0
s15             0.52  10.0
s80             0.52  10.0
s96             0.52  10.0
s19             0.52  10.0
s94             0.52  10.0
s21             0.52  10.0
s35             0.52  10.0
s112            0.50  20.0
s11             0.50  20.0
s76             0.50  20.0
s23             0.50  20.0
s72             0.48  24.0
s33             0.48  24.0
s49             0.48  24.0
s70             0.48  24.0
s117            0.48  24.0
s53             0.48  24.0
s118            0.48  24.0
...              ...   ...
s10             0.00  83.0
s8              0.00  83.0
s6              0.00  83.0
s5              0.00  83.0
s4              0.00  83.0
s

      Suspiciousness  Rank
s120            1.00   1.0
s59             1.00   1.0
s82             1.00   1.0
s110            0.60   4.0
s55             0.60   4.0
s31             0.57   6.0
s74             0.57   6.0
s53             0.53   8.0
s13             0.53   8.0
s70             0.53   8.0
s96             0.53   8.0
s21             0.52  12.0
s84             0.52  12.0
s128            0.51  14.0
s9              0.51  14.0
s102            0.51  14.0
s119            0.51  14.0
s41             0.51  14.0
s117            0.51  14.0
s17             0.51  14.0
s37             0.51  14.0
s88             0.51  14.0
s118            0.51  14.0
s47             0.50  24.0
s104            0.50  24.0
s80             0.50  24.0
s35             0.50  24.0
s57             0.49  28.0
s72             0.49  28.0
s56             0.49  28.0
...              ...   ...
s14             0.00  84.0
s10             0.00  84.0
s8              0.00  84.0
s6              0.00  84.0
s5              0.00  84.0
s

      Suspiciousness  Rank
s59             1.00   1.0
s88             1.00   1.0
s82             1.00   1.0
s120            1.00   1.0
s102            0.78   5.0
s9              0.78   5.0
s51             0.75   7.0
s100            0.75   7.0
s37             0.75   7.0
s106            0.73  10.0
s35             0.73  10.0
s21             0.73  10.0
s19             0.73  10.0
s84             0.73  10.0
s80             0.73  10.0
s94             0.73  10.0
s13             0.73  10.0
s96             0.73  10.0
s15             0.73  10.0
s112            0.71  20.0
s11             0.71  20.0
s23             0.71  20.0
s76             0.71  20.0
s72             0.70  24.0
s53             0.70  24.0
s78             0.70  24.0
s70             0.70  24.0
s33             0.70  24.0
s17             0.70  24.0
s117            0.70  24.0
...              ...   ...
s14             0.00  84.0
s10             0.00  84.0
s8              0.00  84.0
s6              0.00  84.0
s5              0.00  84.0
s