In [3]:
from scipy.stats import ttest_rel
import pandas as pd
import math

In [4]:
def calculate_FTTSE(energy, time, is_training=True):
    if is_training:
        return energy * (time**2)
    else:
        return energy * math.exp(time)

def sign_test(data1, data2):
    n = len(data1)
    assert n == len(data2), "Data arrays must have the same length"

    # Calculate the difference between the two data arrays
    diff = data1 - data2

    # Count the number of positive and negative differences
    n_pos = sum(diff > 0)
    n_neg = sum(diff < 0)

    # Calculate the two-tailed p-value
    p_value = 2 * min(ttest_rel(data1, data2).pvalue, 0.5)

    return n_pos, n_neg, p_value

In [5]:
df = pd.read_csv("data/presentation.csv")

df['FTTSE_Training'] = df.apply(
    lambda row: calculate_FTTSE(row['Training_Energy_Joules'], row['Training_Time_Seconds'], is_training=True), axis=1)
df['FTTSE_Inference'] = df.apply(
    lambda row: calculate_FTTSE(row['Inference_Energy_Joules'], row['Inference_Time_Seconds'], is_training=False), axis=1)

In [6]:
# Perform the sign test to see whether there is a difference in energy efficiency among the classification models during training
models = df['Model'].unique()
sign_test_results_training = pd.DataFrame(index=models, columns=models)
for model1 in models:
    for model2 in models:
        if model1 == model2:
            sign_test_results_training.loc[model1, model2] = (0, 0, 1.0)
        else:
            data1 = df[df['Model'] == model1]['FTTSE_Training'].values
            data2 = df[df['Model'] == model2]['FTTSE_Training'].values
            n_pos, n_neg, p_value = sign_test(data1, data2)
            sign_test_results_training.loc[model1, model2] = (n_pos, n_neg, p_value)

print("Training")
print(sign_test_results_training)

Training
                                     Decision Tree  \
Decision Tree                          (0, 0, 1.0)   
Naive Bayes          (0, 3, 0.0005826531803877175)   
Logistic Regression  (0, 3, 0.0005878274873670021)   

                                       Naive Bayes  \
Decision Tree        (3, 0, 0.0005826531803877175)   
Naive Bayes                            (0, 0, 1.0)   
Logistic Regression   (3, 0, 0.003499942864720424)   

                               Logistic Regression  
Decision Tree        (3, 0, 0.0005878274873670021)  
Naive Bayes           (0, 3, 0.003499942864720424)  
Logistic Regression                    (0, 0, 1.0)  


In [7]:
# Perform the sign test to see whether there is a difference in energy efficiency among the classification models during inference
sign_test_results_inference = pd.DataFrame(index=models, columns=models)
for model1 in models:
    for model2 in models:
        if model1 == model2:
            sign_test_results_inference.loc[model1, model2] = (0, 0, 1.0)
        else:
            data1 = df[df['Model'] == model1]['FTTSE_Inference'].values
            data2 = df[df['Model'] == model2]['FTTSE_Inference'].values
            n_pos, n_neg, p_value = sign_test(data1, data2)
            sign_test_results_inference.loc[model1, model2] = (n_pos, n_neg, p_value)

print("Inference")
print(sign_test_results_inference)

Inference
                                     Decision Tree  \
Decision Tree                          (0, 0, 1.0)   
Naive Bayes          (3, 0, 0.0005781806676582863)   
Logistic Regression   (3, 0, 0.013792409162570152)   

                                       Naive Bayes  \
Decision Tree        (0, 3, 0.0005781806676582863)   
Naive Bayes                            (0, 0, 1.0)   
Logistic Regression   (0, 3, 0.000554567004837298)   

                              Logistic Regression  
Decision Tree        (0, 3, 0.013792409162570152)  
Naive Bayes          (3, 0, 0.000554567004837298)  
Logistic Regression                   (0, 0, 1.0)  
