# 1 Find out Flaky Mutants

In [None]:
import numpy as np
import json
from pitest_log_parser import project_list, round_number, seed_list, mutant_choice, test_choice, TIMED_OUT
random_mutant = True
random_test = False
choice = f'{mutant_choice[random_mutant]}_{test_choice[random_test]}'
seed_num = len(seed_list)


def get_info(project, seed):
    path = f'log-parsed-data/{choice}/{project}_{seed}'
    with open(f'{path}/mutantId_mutantTuple.json', 'r') as file:
        mutantId_mutant_dict = json.load(file)
    with open(f'{path}/mutantId_runtimeList.json', 'r') as file:
        mutantId_runtimeList_dict = json.load(file)
    with open(f'{path}/test_testId.json', 'r') as file:
        test_testId_dict = json.load(file)
    return mutantId_mutant_dict, mutantId_runtimeList_dict, test_testId_dict


for project in project_list:
    nonFlakyMutant_set = {}
    for seed in seed_list:
        per_seed_set = set()
        mutantId_mutant_dict, mutantId_runtimeList_dict, test_testId_dict = get_info(project, seed)
        for mutant_id, runtime_list in mutantId_runtimeList_dict.items():
            mutant = mutantId_mutant_dict[mutant_id]
            if any(np.isnan(runtime) for runtime in runtime_list if isinstance(runtime, (float, int))):
                continue
            mutant[-1] = tuple(set(mutant[-1]))
            per_seed_set.add(tuple(mutant))
        if len(nonFlakyMutant_set) == 0:
            nonFlakyMutant_set = per_seed_set
        else:
            nonFlakyMutant_set &= per_seed_set
    nonFlakyMutant_list = list(nonFlakyMutant_set)
    with open(f'analyzed-data/{choice}/nonFlakyMutant-list/{project}.json', 'w') as file:
        json.dump(nonFlakyMutant_list, file, indent=4)

# 2 Calculate T-test, U-test

In [None]:
import pandas as pd
import numpy as np
import warnings
import json
import os
from scipy.stats import ttest_ind, mannwhitneyu
from pitest_log_parser import project_list, round_number, seed_list, mutant_choice, test_choice, TIMED_OUT
random_mutant = True
random_test = False
choice = f'{mutant_choice[random_mutant]}_{test_choice[random_test]}'
seed_num = len(seed_list)
warnings.filterwarnings('ignore', category=RuntimeWarning, message='Precision loss occurred in moment calculation.')


def get_info(project, seed):
    path = f'log-parsed-data/{choice}/{project}_{seed}'
    with open(f'{path}/mutantId_mutantTuple.json', 'r') as file:
        mutantId_mutantTuple_dict = json.load(file)
    with open(f'{path}/mutantId_runtimeList.json', 'r') as file:
        mutantId_runtimeList_dict = json.load(file)
    return mutantId_mutantTuple_dict, mutantId_runtimeList_dict


def get_runtimeList(mutant, mutantId_mutantTuple_dict, mutantId_runtimeList_dict):
    runtime_list = []
    for mutant_id, mutant_tuple in mutantId_mutantTuple_dict.items():
        if mutant[:-1] == mutant_tuple[:-1]:
            wild_runtime_list = mutantId_runtimeList_dict[mutant_id]
            for runtime in wild_runtime_list:
                if runtime == TIMED_OUT:
                    runtime_list.append(6000)
                else:
                    runtime_list.append(int(runtime))
            break
    return runtime_list


for project in project_list:
    with open(f'analyzed-data/{choice}/nonFlakyMutant-list/{project}.json', 'r') as file:
        nonFlakyMutant_list = json.load(file)
    for i in range(seed_num):
        seed_i = seed_list[i]
        i_mutantTuple_dict, i_runtimeList_dict = get_info(project, seed_i)
        j = i + 1
        while j < seed_num:
            df = pd.DataFrame(None, columns=['mutant', 'T-test', 'U-test'])
            significant_dict = {}
            seed_j = seed_list[j]
            j_mutantTuple_dict, j_runtimeList_dict = get_info(project, seed_j)
            significant_mutant_number = 0
            for mutant in nonFlakyMutant_list:
                i_array = get_runtimeList(mutant=mutant, mutantId_mutantTuple_dict=i_mutantTuple_dict, mutantId_runtimeList_dict=i_runtimeList_dict)
                j_array = get_runtimeList(mutant=mutant, mutantId_mutantTuple_dict=j_mutantTuple_dict, mutantId_runtimeList_dict=j_runtimeList_dict)
                t_stat, t_p_value = ttest_ind(i_array, j_array)
                u_stat, u_p_value = mannwhitneyu(i_array, j_array)
                if t_p_value < 0.05 or u_p_value < 0.05:
                    significant_mutant_number += 1
                df.loc[len(df.index)] = [tuple(mutant[:-1]), t_p_value, u_p_value]
            print(f'{project} between {seed_i} and {seed_j}: {significant_mutant_number}/{len(nonFlakyMutant_list)}')
            df.to_csv(f'analyzed-data/{choice}/significance-detection/{project}_{seed_i}_{seed_j}.csv', sep=',', header=True, index=False)
            j += 1

# 3. Get total running time except flaky mutants.

In [56]:
import pandas as pd
import json
from scipy.stats import ttest_ind, mannwhitneyu
from pitest_log_parser import project_list, round_number, seed_list, mutant_choice, test_choice, TIMED_OUT
random_mutant = True
random_test = False
choice = f'{mutant_choice[random_mutant]}_{test_choice[random_test]}'
seed_num = len(seed_list)

def get_info(file_path):
    with open(f'{file_path}/mutantId_mutantTuple.json', 'r') as file:
        mutantId_mutantTuple_dict = json.load(file)
    with open(f'{file_path}/mutantId_runtimeList.json', 'r') as file:
        mutantId_runtimeList_dict = json.load(file)
    return mutantId_mutantTuple_dict, mutantId_runtimeList_dict


def add_list(array1, array2):
    for i in range(round_number):
        if array2[i] == TIMED_OUT:
            array1[i] += 6000
        else:
            array1[i] += int(array2[i])


up_dir = f'analyzed-data/{choice}/nonFlakiness-totalRunningTime'
significant_df = pd.DataFrame(None, columns=['project', 'seed-pair', 'T-test', 'U-test'])
columns = ['seed'] + [f'round-{i}' for i in range(round_number)]
for project in project_list:
    with open(f'analyzed-data/{choice}/nonFlakyMutant-list/{project}.json', 'r') as file:
        nonFlakyMutant_list = json.load(file)
    df = pd.DataFrame(None, columns=columns)
    seed_totalRuntimeList_dict = {}
    for seed in seed_list:
        runtime_list = [0 for _ in range(round_number)]
        mutantId_mutantTuple_dict, mutantId_runtimeList_dict = get_info(f'log-parsed-data/{choice}/{project}_{seed}')
        for mutant in nonFlakyMutant_list:
            for k_id, v_tuple in mutantId_mutantTuple_dict.items():
                if v_tuple[:-1] == mutant[:-1]:
                    mutant_id = k_id
                    break
            add_list(runtime_list, mutantId_runtimeList_dict[mutant_id])
        seed_totalRuntimeList_dict[seed] = runtime_list
        df.loc[len(df.index)] = [seed] + runtime_list
    for i in range(seed_num):
        seed_i = seed_list[i]
        i_array = seed_totalRuntimeList_dict[seed_i]
        for j in range(seed_num):
            if j > i:
                seed_j = seed_list[j]
                j_array = seed_totalRuntimeList_dict[seed_j]
                t_stat, t_p_value = ttest_ind(i_array, j_array)
                u_stat, u_p_value = mannwhitneyu(i_array, j_array)
                significant_df.loc[len(significant_df.index)] = [project, (seed_i, seed_j), t_p_value, u_p_value]
    df.to_csv(f'{up_dir}/{project}.csv', sep=',', header=True, index=False)
significant_df.to_csv(f'{up_dir}/significant-results.csv', sep=',', header=True, index=False)