In [1]:
import scipy.stats as stats
import numpy as np
import scikit_posthocs as sp
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [2]:
conf_energy_dict = {}
conf_time_dict = {}
lang_energy_dict = {}
lang_time_dict = {}
lang_num_dict = {}

all_energy_list = []
avg_energy_list = []
lang_list = []

df_energy_dict = {}

with open("..\\example-data\\Data.csv", "r") as f:
    lines = f.readlines()
    for line in lines:
        if line.startswith("Group"):
            continue
        line = line.strip().split(",")
        if line[1] == "" and line[3] == "":
            continue
        if line[1] == "" and line[3] != "":
            continue
        lang = line[0]
        conf = line[1]
        energy = float(line[3])
        time = float(line[5])
        if conf not in conf_energy_dict:
            conf_energy_dict[conf] = []
        
        conf_energy_dict[conf].append(energy)
        if conf not in conf_time_dict:
            conf_time_dict[conf] = []        
        conf_time_dict[conf].append(time)
        
        if lang not in lang_list:
            lang_list.append(lang)

In [3]:
# Normality test

list_of_normality = []
list_of_non_normality = []
conf_normality_dict = {}


for config, data in conf_energy_dict.items():
    stat, p_value = stats.shapiro(data)
    conf_normality_dict[config] = p_value
    if p_value < 0.05:
        list_of_non_normality.append(config)
    else:
        list_of_normality.append(config)
print("Normality: ", list_of_normality)
print("Non Normality: ", list_of_non_normality)
print("Shapiro-Wilk test p-values: ", conf_normality_dict)

Normality:  ['rust-default', 'rust-jemalloc', 'rust-PGO', 'python-default', 'python-jemalloc', 'ruby-default', 'ruby-jemalloc', 'ruby-yjit', 'javascript-default', 'javascript-jemalloc', 'csharp-default', 'csharp-jemalloc', 'csharp-quick-jit-false', 'go-default', 'go-jemalloc', 'go-pgo']
Non Normality:  ['javascript-jitless', 'java-default', 'java-jemalloc']
Shapiro-Wilk test p-values:  {'rust-default': 0.8829871416091919, 'rust-jemalloc': 0.6482977867126465, 'rust-PGO': 0.7547478079795837, 'python-default': 0.43419596552848816, 'python-jemalloc': 0.06945698708295822, 'ruby-default': 0.6085011959075928, 'ruby-jemalloc': 0.5424874424934387, 'ruby-yjit': 0.8187602758407593, 'javascript-default': 0.8487940430641174, 'javascript-jemalloc': 0.842570424079895, 'javascript-jitless': 0.0011928870808333158, 'java-default': 0.0008717963937669992, 'java-jemalloc': 0.0007519629434682429, 'csharp-default': 0.4383963942527771, 'csharp-jemalloc': 0.5290932059288025, 'csharp-quick-jit-false': 0.6814511

In [4]:
k = 1
alpha = 0.05
beta = 0.2
z_beta = 0.84
z_alpha = 1.96
for lang in lang_list:
    lang_default = []
    for config, data in conf_energy_dict.items():
        # print("Config: ", config)
        # print("lang: ", lang)
        if str(lang) in str(config) and "default" in str(config):
            lang_default.extend(data)
            std_default = np.std(lang_default)
            mean_default = np.mean(lang_default)
            n_default = len(lang_default)
    for config, data in conf_energy_dict.items():
        # print("Config: ", conf, "lang: ", lang)
        if str(lang + "-") in str(config) and "default" not in str(config):
            if config in list_of_normality:
                mean_conf = np.mean(data)
                std_conf = np.std(data)
                n_conf = len(data)
                
                s_p = np.sqrt(((n_default - 1)*std_default**2 + (n_conf - 1)*std_conf**2) / (n_default + n_conf - 2))
                cohen_d = (mean_default - mean_conf) / s_p
                n = (2 * (z_alpha + z_beta) ** 2) / (cohen_d ** 2)
                print("Sample size for {}: {}".format(config, n))
                
                
            else:
                A = stats.norm.cdf(cohen_d / np.sqrt(2))
                n = ((z_alpha + z_beta)**2) / (12 * (A - 0.5)**2)
                print("Sample size for {}: {}".format(config, n))
                

Sample size for rust-jemalloc: 7.69699597964794
Sample size for rust-PGO: 146.42425649546288
Sample size for python-jemalloc: 36.121044098093876
Sample size for ruby-jemalloc: 0.14913929493948253
Sample size for ruby-yjit: 0.01805690703200167
Sample size for javascript-jemalloc: 13.140237646411789
Sample size for javascript-jitless: 8.329246933056625
Sample size for java-jemalloc: 8.329246933056625
Sample size for csharp-jemalloc: 3.8889211616063593
Sample size for csharp-quick-jit-false: 1.0388701776455025
Sample size for go-jemalloc: 83.3281673871065
Sample size for go-pgo: 4.848230118176141


In [7]:
# Pearson correlation
from scipy.stats import pearsonr
from scipy.stats import spearmanr

dict_energy = {}
dict_power = {}
dict_time = {}
with open("..\\example-data\\Data.csv", "r") as f:
    lines = f.readlines()
    x = 0
    for line in lines:
        if x == 0:
            x += 1
            continue
        line = line.strip()
        line = line.split(",")
        if (line[1] == ""):
            continue
        curr_line = line[0] + line[1].split("_")[0]
        if curr_line not in dict_power:
            dict_power[curr_line] = []
            dict_power[curr_line].append(float((line[4])))
        else:
            dict_power[curr_line].append(float((line[4])))
        if curr_line not in dict_energy:
            dict_energy[curr_line] = []
            dict_energy[curr_line].append(float((line[3])))
        else:
            dict_energy[curr_line].append(float((line[3])))
        if curr_line not in dict_time:
            dict_time[curr_line] = []
            dict_time[curr_line].append(float((line[5])))
        else:
            dict_time[curr_line].append(float((line[5])))

energy_all = []
power_all = []
time_all = []
for key in dict_power.keys():
    power = dict_power[key]
    time = dict_time[key]
    power_all.extend(power)
    time_all.extend(time)

corr, p_value = pearsonr(power_all, time_all)
print(f"Pearson correlation for all:")
print("Correlation:", corr)
print("P-value:", p_value)
    
for key in dict_power.keys():
    
    # energy_all = dict_energy[key]
    power = dict_power[key]
    time = dict_time[key]
    corr, p_value = pearsonr(power, time)
    print(f"Pearson correlation for {key}:")
    print("Correlation:", corr)
    print("P-value:", p_value)

Pearson correlation for all:
Correlation: 0.8980414633303895
P-value: 6.238158615850495e-35
Pearson correlation for rustrust-default:
Correlation: -0.8811109584466479
P-value: 0.048322129679221074
Pearson correlation for rustrust-jemalloc:
Correlation: -0.6864476881172816
P-value: 0.20055636034961816
Pearson correlation for rustrust-PGO:
Correlation: -0.7507173710381203
P-value: 0.1436898370639007
Pearson correlation for pythonpython-default:
Correlation: -0.720526682757158
P-value: 0.16972408046324386
Pearson correlation for pythonpython-jemalloc:
Correlation: -0.08158621302861507
P-value: 0.8962365641809368
Pearson correlation for rubyruby-default:
Correlation: -0.9149061006951091
P-value: 0.02941433648991627
Pearson correlation for rubyruby-jemalloc:
Correlation: -0.7486747507361642
P-value: 0.14541096248150054
Pearson correlation for rubyruby-yjit:
Correlation: -0.9867341247432865
P-value: 0.001830508026108779
Pearson correlation for javascriptjavascript-default:
Correlation: -0.45