In [21]:
import os.path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [22]:
save_folder = os.path.join(os.path.abspath(os.path.curdir), 'results')

In [28]:
def test_hypothesis(experiment_name: str, test_name: str, df: pd.DataFrame):
    x = [x for x in range(len(df['BxesSize'].to_numpy()))]

    bxes_line = df['OriginalSize'].to_numpy() / df['BxesSize'].to_numpy()
    bxes_preprocessing = df['OriginalSize'].to_numpy() / df['BxesPreprocessing'].to_numpy()
    exi = df['OriginalSize'].to_numpy() / df['ExiSize'].to_numpy()

    experiment_folder_path = os.path.join(save_folder, experiment_name)
    if not os.path.exists(experiment_folder_path):
        os.makedirs(experiment_folder_path, True)

    fig, ax = plt.subplots()
    ax.plot(x, bxes_preprocessing, label='BxesPreprocessing')
    ax.plot(x, bxes_line, label='Bxes', linestyle='--')
    ax.plot(x, exi, label='Exi', linestyle=':')
    ax.legend()

    compression_coef_path = os.path.join(experiment_folder_path, f'{test_name}.png')
    fig.savefig(compression_coef_path)
    plt.close(fig)

    fig, ax = plt.subplots()
    ax.plot(x, df['OriginalSize'], label='OriginalSize')
    ax.plot(x, df['BxesToXesSize'], label='BxesToXesSize')
    ax.legend()
    
    file_size_path = os.path.join(experiment_folder_path, f'FileSize{test_name}.png')
    fig.savefig(file_size_path)
    plt.close(fig)

    print(f'TEST NAME: {test_name}')
    print('======================================')
    print(f'Exi-BxesPreprocessing {scipy.stats.ttest_rel(exi, bxes_preprocessing)}')
    print(f'Exi-Bxes {scipy.stats.ttest_rel(exi, bxes_line)}')
    print(f'Bxes-BxesPreprocessing', {scipy.stats.ttest_rel(bxes_line, bxes_preprocessing)})
    print(f'Bxes mean: {np.mean(bxes_line)}')
    print(f'BxesPreprocessing mean: {np.mean(bxes_preprocessing)}')
    print(f'Exi: {np.mean(exi)}')
    print('======================================')
    print()

def test_hypothesis_filtered_df(experiment_name, test_name: str, df, predicate):
    filter_result = df.apply(predicate, axis=1)
    df = df[filter_result]
    print(df['Name'].unique())
    test_hypothesis(experiment_name, test_name, df)

def all_procfiler_logs_predicate(row):
    return ('_1' in row['Name'] or '_25' in row['Name'] or '_50' in row['Name'] or '_75' in row['Name']) and 'BPI' not in row['Name']

def one_repeat_procfiler_logs_predicate(row):
    return row['Name'].endswith('_1') and 'BPI' not in row['Name']

def twenty_five_repeat_procfiler_logs_predicate(row):
    return ('_25' in row['Name']) and 'BPI' not in row['Name']

def fifty_repeat_procfiler_logs_predicate(row):
    return ('_50' in row['Name']) and 'BPI' not in row['Name']

def seventy_five_repeat_procfiler_logs_predicate(row):
    return ('_75' in row['Name']) and 'BPI' not in row['Name']


def analyze_results_file(experiment_name: str, path: str):
    df = pd.read_csv(path, sep=';')
    df.head()
    
    test_hypothesis(experiment_name, 'AllLogs', df)
    test_hypothesis_filtered_df(experiment_name, 'ProcfilerLogs', df, all_procfiler_logs_predicate)
    test_hypothesis_filtered_df(experiment_name, 'ProcfilerLogs1', df, one_repeat_procfiler_logs_predicate)
    test_hypothesis_filtered_df(experiment_name, 'ProcfilerLogs25', df, twenty_five_repeat_procfiler_logs_predicate)
    test_hypothesis_filtered_df(experiment_name, 'ProcfilerLogs50', df, fifty_repeat_procfiler_logs_predicate)
    test_hypothesis_filtered_df(experiment_name, 'ProcfilerLogs75', df, seventy_five_repeat_procfiler_logs_predicate)

In [29]:
analyze_results_file('OldResults', 'results.csv')

TEST NAME: AllLogs
Exi-BxesPreprocessing TtestResult(statistic=-2.7926132251423224, pvalue=0.006254805652805675, df=101)
Exi-Bxes TtestResult(statistic=-2.596364163440163, pvalue=0.01082618740163833, df=101)
Bxes-BxesPreprocessing {TtestResult(statistic=-3.6934244567760683, pvalue=0.00035925895222195905, df=101)}
Bxes mean: 53.7781774467135
BxesPreprocessing mean: 54.46259362049429
Exi: 47.36671226539508

['not_existing_assembly_loading_50' 'finalizable_object_1'
 'exception_try_catch_finally_when_1' 'unsafe_fixed_50' 'array_pooling_75'
 'task_test_project_50' 'dynamic_assembly_loading_1'
 'finalizable_object_75' 'simple_async_await_1' 'console_app_1_1'
 'intensive_thread_pool_25' 'file_async_operations_1'
 'dynamic_assembly_loading_50' 'exception_try_catch_finally_1'
 'not_simple_async_await_75' 'console_app_1_75'
 'exception_try_catch_finally_when_25' 'intensive_thread_pool_50'
 'exception_try_catch_finally_async_1' 'task_test_project_25'
 'unsafe_fixed_25' 'yield_enumerator_75'
 'no

In [30]:
analyze_results_file('NewResultsNoLifecycle', 'results_no_lifecycle.csv')

TEST NAME: AllLogs
Exi-BxesPreprocessing TtestResult(statistic=-1.7450665372609482, pvalue=0.08401480209523475, df=101)
Exi-Bxes TtestResult(statistic=-1.5412546634866897, pvalue=0.12638218445849037, df=101)
Bxes-BxesPreprocessing {TtestResult(statistic=-4.559008207996465, pvalue=1.445169948523599e-05, df=101)}
Bxes mean: 51.179215192218805
BxesPreprocessing mean: 51.774749039354084
Exi: 47.36671226539508

['not_existing_assembly_loading_50' 'finalizable_object_1'
 'exception_try_catch_finally_when_1' 'unsafe_fixed_50' 'array_pooling_75'
 'task_test_project_50' 'dynamic_assembly_loading_1'
 'finalizable_object_75' 'simple_async_await_1' 'console_app_1_1'
 'intensive_thread_pool_25' 'file_async_operations_1'
 'dynamic_assembly_loading_50' 'exception_try_catch_finally_1'
 'not_simple_async_await_75' 'console_app_1_75'
 'exception_try_catch_finally_when_25' 'intensive_thread_pool_50'
 'exception_try_catch_finally_async_1' 'task_test_project_25'
 'unsafe_fixed_25' 'yield_enumerator_75'
 'n