This file implements Network Mutation Testing.

This was designed by Adam Abdalla, but the results lacked the promise necessary to continue working on it over CPMT.

If you would like to pick it back up, I recommend the starting out with the following:

-No level of precision makes the current time taken worth it, so determine what part takes longest and find a way to speed it up. (Consider libraries?)

-Include all characteristics for the connections.

-Use hyperopt to find optimal weights for all characteristics

-Remove characteristics with insignificant weights

-Find a way to determine the optimal treshold. It may require a complex calculation as it could be dependent on a lot of things.

In [None]:
import math
import numpy as np
from os import path
import os
from random import sample, seed
import csv
import pandas as pd
import kmeans1d
from hyperopt import hp, fmin, tpe
from sklearn.preprocessing import LabelEncoder
import time
import warnings

In [None]:
# Source: tutorialspoint.com/file-searching-using-python
def find_files(filename, search_path):
   result = []

    # Walking top-down from the root
   for root, _, files in os.walk(search_path):
      if filename in files:
         result.append(os.path.join(root, filename))
   return result

# Source: https://stackoverflow.com/a/29651514
def normalize(df: pd.DataFrame, featureIndices: list) -> pd.DataFrame:
    result = df.copy()

    for feature_name in df.columns[featureIndices]:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)

    return result

# Gets characteristics and result status data from project file
# created by pitest clustering plugin.
def getProjectDfs(project: str) -> tuple[pd.DataFrame, pd.DataFrame] | int:
    csv_path = "projects/" + project
    charPath = find_files("characteristics.csv", csv_path)

    if charPath:
        charPath = charPath[0]
        data = pd.read_csv(charPath,
                            names=["id", "mutOperator", "opcode", "returnType",
                                    "localVarsCount", "isInTryCatch", "isInFinalBlock",
                                    "className", "methodName", "blockNumber", "lineNumber"],
                            skiprows=1)
        killedPath = find_files("killed.csv", csv_path)

        if killedPath:
            killedPath = killedPath[0]
            results = pd.read_csv(killedPath,
                                    names=["id", "killed", "numTests"],
                                    skiprows=1)
            results = results.drop(columns=["numTests"])
        else:
            print("Could not find killed.csv for project: " + project)
            return -1
    else:
        print("Could not find characteristics.csv for project: " + project)
        return -1

    return data, results


# Uses label encoding and clustering to change numerical data to categorical.
# Also merges data with results.
def dfToCategorical(data: pd.DataFrame, results: pd.DataFrame, parameters: list) -> pd.DataFrame:
    localityReduction, n_localVarsClusters = parameters
    # define ordinal encoding
    encoder = LabelEncoder()
    newData = data[["id", "mutOperator", "opcode", "returnType", "localVarsCount",
                    "isInTryCatch", "className", "methodName", "lineNumber"]]
    for col in ["mutOperator", "returnType", "className", "methodName"]:
        newData[col] = encoder.fit_transform(newData[col])

    # Categorical locality variable creation.
    newData["className"] = newData["className"].apply(lambda x: x*100000)
    newData["methodName"] = newData["methodName"].apply(lambda x: x*1000)
    localityClustering = kmeans1d.cluster(np.asarray(newData[["className"]], dtype="int64") +
                                          np.asarray(newData[["methodName"]], dtype="int64") +
                                          np.asarray(newData[["lineNumber"]], dtype="int64"), int(math.ceil(len(data) * localityReduction)))[0]

    varsClustering = kmeans1d.cluster(np.asarray(newData[["localVarsCount"]], dtype="int64"), n_localVarsClusters)[0]

    training = newData[["id", "mutOperator", "opcode", "returnType", "isInTryCatch"]]
    training["localityCluster_id"] = localityClustering
    training["varsCluster_id"] = varsClustering
    training = training.merge(results, how="inner", on="id")
    training = normalize(training, range(3, 6))

    return training


# Creates a dictionary where each mutant has a dictionary as its value.
# In this dictionary are all the mutants connected to it as keys
# with the corresponding total weight of the connection as the value.
def graphCreator(fullDF: pd.DataFrame, weights: list) -> dict:
    graph = {}
    for i, col in enumerate(["returnType", "isInTryCatch", "localityCluster_id", "varsCluster_id"]):
        for val in list(dict.fromkeys(fullDF[col].tolist())):
            sharedValList = fullDF[fullDF[col] == val].index.tolist()
            for idx in sharedValList:
                if idx not in graph:
                    graph[idx] = {}
                sharedValListCopy = sharedValList.copy()
                sharedValListCopy.remove(idx)
                for idx2 in sharedValListCopy:
                    if idx2 in graph[idx]:
                        graph[idx][idx2] *= weights[i]
                    else:
                        graph[idx][idx2] = weights[i]

    return graph


# Selects which mutants should be executed.
def sampleSelector(graph: dict, fullDF: pd.DataFrame, add: float, reduction: float) -> tuple[list, float]:
    notInGraph = list(graph.keys())
    inSelection = []
    currentRed = 0
    totalWeights = {}

    # The totalweight is the sum of the weight of each connection a mutant has.
    for idx in graph:
        totalWeight = 0
        for idx2 in graph[idx]:
            totalWeight += graph[idx][idx2] * 10
        totalWeights[idx] = totalWeight

    # The while loop selects the mutants with the largest totalweight.
    # It then reduces the totalweight for each mutant connected to the tested ones
    # for the next loop. This causes the next loop to put more priority on
    # selecting mutants which connect mutants not yet connected.
    # It would be too costly to do this for every mutant selection,
    # so the "add" parameter decides how many mutants get selected before recalculation.
    while currentRed < reduction:
        sortedIdxs = sorted(list(totalWeights.items()), key=lambda x: x[1])
        selectedList = [x[0] for x in sortedIdxs[:math.ceil(len(fullDF) * add)]]
        added = selectedList.copy()

        for selected in selectedList:
            added.extend(list(graph[selected].keys()))
            del totalWeights[selected]

        added = list(dict.fromkeys(added))
        notInGraph = list(set(notInGraph)-set(added))
        inSelection.extend(selectedList)

        for idx2 in added:
            for idx in graph[idx2]:
                if idx in totalWeights:
                    totalWeights[idx] -= graph[idx][idx2] * 9

        currentRed = len(inSelection) / len(fullDF)

    return inSelection, reduction


# Predictors store how much of the weight of the executed mutants
# connected to each mutant was killed.
# Predictions stores the results of the executed mutants in the predictions dict
# since we no longer need to calculate those.
def predictorScores(fullDF: pd.DataFrame, inSelection: list, graph: dict) -> tuple[dict, dict]:
    predictors = {}
    predictions = {}

    for idx in inSelection:
        killed = fullDF.loc[idx, "killed"]
        predictions[idx] = killed

        for idx2 in graph[idx]:
            weight = graph[idx][idx2]

            if killed:
                killedWeight = weight
            else:
                killedWeight = 0

            if idx2 in predictors:
                predictors[idx2][0] += killedWeight
                predictors[idx2][1] += weight
            else:
                predictors[idx2] = [killedWeight, weight]

    return predictions, predictors


# Returns the predictions for all mutants of given project.
def own_predict(project: str, reduction: float=0.1, add: float=0.01, parameters: list=[0.05, 5], weights: list=[2, 5, 5, 4]) -> tuple[np.ndarray, float]:
    localityReduction, n_localVarsClusters = parameters
    print("Starting prediction project: " + project)
    start_time = time.time()
    dataframes = getProjectDfs(project)
    if dataframes == -1:
        return -1
    data, results = dataframes

    # Transformation from raw characteristic data to NMT-ready data.
    fullDF = dfToCategorical(data, results, parameters)
    graph = graphCreator(fullDF, weights)
    inSelection, reduction = sampleSelector(graph, fullDF, add, reduction)
    print("Reduction = " + str(reduction))
    predictions, predictors = predictorScores(fullDF, inSelection, graph)

    # The algorithm should predict a mutant as killed if a certain percentage of its weight
    # is from killed mutants. I was using this to check for the optimal treshold.
    # The treshold should probably differ for each project. Not sure how to calculate it.
    percentageKilled = len(np.flatnonzero(np.asarray(predictions.values())))/len(predictions)
    for i in range(100):
        treshold = 0.90 + i * 0.001
        print(treshold)
        for idx in predictors:
            if predictors[idx][0] > predictors[idx][1]*treshold:
                predictions[idx] = 1
            else:
                predictions[idx] = 0

        precisionCalc(project, [prediction for idx, prediction in sorted(list(predictions.items()), key = lambda x: x[0])])

    return predictions, reduction

# Calculates the precision: percentage of correct predictions in decimals.
def precisionCalc(project: str, predictions: np.ndarray) -> float:
    dataframes = getProjectDfs(project)
    if dataframes == -1:
        return -1
    data, results = dataframes
    data["prediction"] = predictions
    newData = data[["id", "prediction"]]
    merged = newData.merge(results, how="inner", on="id")
    merged.to_csv("projects/" + project + "/predictions.csv", sep=",", index=False)
    correctList = [1 if i == j else 0 for i, j in zip(merged["killed"].tolist(), merged["prediction"].tolist())]
    precision = len([1 for val in correctList if val == 1 ])/len(correctList)
    print("Precision = " + str(precision))
    return precision

In [None]:
seeds = [
    66304, 16389, 14706, 91254, 49890, 86054, 55284, 77324, 36147, 13506, 73920, 80157, 43981, 75358, 33399, 56134,
    13388, 81617, 90957, 52113, 20428, 26482, 56340, 31018, 32067, 13067, 8339, 49008, 125894, 68282, ]
trainingProjects = ["google-auto-service", "scribejava-core", "commons-cli",
                    "google-auto-value","gson", "commons-io", "commons-codec"]
projects = [ "google-auto-factory", "google-auto-common", "commons-csv", "commons-text"]

In [None]:
warnings.filterwarnings('ignore')

In [None]:
def tryOut(args=[]):
    precisions = []
    reductions = []
    for project in projects:
        start_time = time.time()
        results, reduction = own_predict(project)
        print(time.time() - start_time)
        precisions.append(precisionCalc(project, [prediction for idx, prediction in sorted(list(results.items()), key = lambda x: x[0])]))
        reductions.append(reduction)
    performance = np.mean(precisions)
    reduction = np.mean(reductions)
    print(performance, reduction)

tryOut()