In [42]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm # 导入 statsmodels 用于 Q-Q 图
import itertools # 用于帮助迭代
from scipy.stats import shapiro

In [2]:
# Read the CSV file
df = pd.read_csv("run_table.csv")
benchmark_list=df['benchmark'].unique().tolist()
phase_list = ['cold','warm']
compilations_list = ['original', 'cython', 'numba']
x_list = ['cold_start_energy','warm_start_energy', 'cold_start_cpu_util', 'warm_start_cpu_util',
        'cold_start_duration', 'warm_start_duration', 'cold_start_memory_util','warm_start_memory_util']


In [5]:
# create statistic table
grouped = df.groupby(['benchmark', 'compilation'])

# mean,std,min,max
stats = grouped.describe()

print(stats)
stats.to_csv("grouped_stats.csv")

                                                    cold_start_energy  \
                                                                count   
benchmark                               compilation                     
binary_trees/original.py                cython                   15.0   
                                        numba                    15.0   
                                        original                 15.0   
dac_mergesort/original.py               cython                   15.0   
                                        numba                    15.0   
                                        original                 15.0   
dijkstra/original.py                    cython                   15.0   
                                        numba                    15.0   
                                        original                 15.0   
fannkuch/original.py                    cython                   15.0   
                                        numba      

In [33]:
def box_plot(benchmark_to_plot, phase, df):
    # create plot image name
    benchmark_name = benchmark_to_plot.split('.')[0]
    benchmark_name = benchmark_name.replace('/', '_')

    df_filtered = df[df['benchmark'] == benchmark_to_plot]

    # 2*2 grid
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # Energy
    sns.boxplot(data=df_filtered, x='compilation', y=f'{phase}_start_energy', ax=axes[0, 0])
    axes[0, 0].set_title(f'{phase} start energy')
    axes[0, 0].set_ylabel('Energy (J)')
    axes[0, 0].set_xlabel('') # remove xlabel

    # CPU Utilization
    sns.boxplot(data=df_filtered, x='compilation', y=f'{phase}_start_cpu_util', ax=axes[0, 1])
    axes[0, 1].set_title(f'{phase} start CPU utilization')
    axes[0, 1].set_ylabel('CPU Utilization (%)')
    axes[0, 1].set_xlabel('')

    # Duration
    sns.boxplot(data=df_filtered, x='compilation', y=f'{phase}_start_duration', ax=axes[1, 0])
    axes[1, 0].set_title(f'{phase} start duration')
    axes[1, 0].set_ylabel('Duration (s)')
    axes[1, 0].set_xlabel('Compilation Tool') # add x label at bottom

    # Memory Utilization
    sns.boxplot(data=df_filtered, x='compilation', y=f'{phase}_start_memory_util', ax=axes[1, 1])
    axes[1, 1].set_title(f'{phase} start memory utilization')
    axes[1, 1].set_ylabel('Memory (bytes)')
    axes[1, 1].set_xlabel('Compilation Tool')

    fig.suptitle(f'{phase} start metrics for {benchmark_to_plot}', fontsize=16)
    plt.tight_layout()
    plt.savefig(f'plots/box_{phase}_{benchmark_name}.png')
    plt.show()
    plt.close(fig)

In [34]:
def density_plot(benchmark_to_plot, phase, df):
    # create plot image name
    benchmark_name = benchmark_to_plot.split('.')[0]
    benchmark_name = benchmark_name.replace('/', '_')

    df_filtered = df[df['benchmark'] == benchmark_to_plot]

    # 2*2 grid
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    # density plot, fill=True, alpha=0.3, more readable
    sns.kdeplot(data=df_filtered,x=f'{phase}_start_energy',hue='compilation',
        fill=True,alpha=0.3,ax=axes[0, 0])
    axes[0, 0].set_title(f'{phase} start energy')
    axes[0, 0].set_ylabel('Density')
    axes[0, 0].set_xlabel('Energy(J)')

    sns.kdeplot(data=df_filtered,x=f'{phase}_start_cpu_util',hue='compilation',
        fill=True,alpha=0.3,ax=axes[0, 1])
    axes[0, 1].set_title(f'{phase} start CPU utilization')
    axes[0, 1].set_ylabel('Density')
    axes[0, 1].set_xlabel('CPU Utilization(%)')

    sns.kdeplot(data=df_filtered,x=f'{phase}_start_duration',hue='compilation',
        fill=True,alpha=0.3,ax=axes[1, 0])
    axes[1, 0].set_title(f'{phase} start duration')
    axes[1, 0].set_ylabel('Density')
    axes[1, 0].set_xlabel('Duration')

    sns.kdeplot(data=df_filtered,x=f'{phase}_start_memory_util',hue='compilation',
        fill=True,alpha=0.3,ax=axes[1, 1])
    axes[1, 1].set_title(f'{phase} start memory utilization')
    axes[1, 1].set_ylabel('Density')
    axes[1, 1].set_xlabel('Memory Utilization(bytes)')

    fig.suptitle(f'{phase} start metrics density for {benchmark_to_plot}', fontsize=16)
    plt.tight_layout()
    plt.savefig(f'plots/density_{phase}_{benchmark_name}.png')
    plt.show()
    plt.close(fig)




In [None]:
# box plot and density plot
for benchmark_to_plot in benchmark_list:
    for phase in phase_list:
        box_plot(benchmark_to_plot, phase, df)
        density_plot(benchmark_to_plot, phase, df)

In [41]:
# qq plot
for metric in x_list:

    print(f"qq plot for {metric} ...")

    # 6x5 grid
    fig, axes = plt.subplots(6, 5, figsize=(20, 24))

    axes_flat = axes.flatten()

    # 30 combine
    combinations = list(itertools.product(benchmark_list, compilations_list))

    for i, (bench, comp) in enumerate(combinations):
        ax = axes_flat[i]

        data_to_plot = df[
            (df['benchmark'] == bench) &
            (df['compilation'] == comp)
        ][metric]

        # Q-Q plot
        if not data_to_plot.empty:
            sm.qqplot(data_to_plot, line='s', ax=ax)

        benchmark_name = bench.split('.')[0]
        benchmark_name = benchmark_name.replace('/', '_')
        ax.set_title(f'{benchmark_name} | {comp}', fontsize=9)

        ax.set_xlabel('')
        ax.set_ylabel('')

    # title
    fig.suptitle(f'Q-Q Plots for Normality Check: {metric}', fontsize=24)
    plt.tight_layout()

    output_filename = f'plots/qq_plots_{metric}.png'
    plt.savefig(output_filename)
    print(f"saved: {output_filename}")

    plt.close(fig)

print("\nfinished")

qq plot for cold_start_energy ...
saved: plots/qq_plots_cold_start_energy.png
qq plot for warm_start_energy ...
saved: plots/qq_plots_warm_start_energy.png
qq plot for cold_start_cpu_util ...
saved: plots/qq_plots_cold_start_cpu_util.png
qq plot for warm_start_cpu_util ...
saved: plots/qq_plots_warm_start_cpu_util.png
qq plot for cold_start_duration ...
saved: plots/qq_plots_cold_start_duration.png
qq plot for warm_start_duration ...
saved: plots/qq_plots_warm_start_duration.png
qq plot for cold_start_memory_util ...
saved: plots/qq_plots_cold_start_memory_util.png
qq plot for warm_start_memory_util ...
saved: plots/qq_plots_warm_start_memory_util.png

finished


In [43]:
# Shapiro-Wilk TEST
alpha = 0.05
results_list = []
for metric in x_list:
    for bench in benchmark_list:
        for comp in compilations_list:
            data_to_test = df[
                (df['benchmark'] == bench) &
                (df['compilation'] == comp)
            ][metric]
            # at least 3 data for test
            if len(data_to_test) >= 3:
                stat, p_value = shapiro(data_to_test)
                # if p-value < 0.05, we conclude the data is not normally distributed
                is_normal = p_value > alpha

            else:
                stat, p_value, is_normal = np.nan, np.nan, False # not enough data

            results_list.append({
                'metric': metric,
                'benchmark': bench,
                'compilation': comp,
                'statistic': stat,
                'p_value': p_value,
                f'is_normal (p > {alpha})': is_normal
            })

SW_df = pd.DataFrame(results_list)

print("Shapiro-Wilk test result")
print(SW_df)

print("\n--- Subsets judged to be non-normally distributed ---")
print(SW_df[SW_df[f'is_normal (p > {alpha})'] == False])

Shapiro-Wilk test result
                     metric                  benchmark compilation  statistic  \
0         cold_start_energy   binary_trees/original.py    original   0.978368   
1         cold_start_energy   binary_trees/original.py      cython   0.951322   
2         cold_start_energy   binary_trees/original.py       numba   0.964537   
3         cold_start_energy  dac_mergesort/original.py    original   0.871211   
4         cold_start_energy  dac_mergesort/original.py      cython   0.951367   
..                      ...                        ...         ...        ...   
235  warm_start_memory_util       richards/original.py      cython   0.934964   
236  warm_start_memory_util       richards/original.py       numba   0.918450   
237  warm_start_memory_util  spectral_norm/original.py    original   0.959344   
238  warm_start_memory_util  spectral_norm/original.py      cython   0.969639   
239  warm_start_memory_util  spectral_norm/original.py       numba   0.895398   

  